Skip to content

Commit cb5dc37

Browse files
committed
TableGen/GlobalISel: Fix constraining REG_SEQUENCE operands
This was hitting the default instruction constraint code which uses the register classes in the instruction def, which REG_SEQUENCE does not have. Fixes not constraining the register class for AMDGPU fneg/fabs patterns, which would fail when the use was another generic, unconstrained instruction. Another oddity I noticed is that the temporary registers are created with an unnecessary, but incorrect 16-bit LLT but this shouldn't matter. I'm also still unclear why root and sub-instructions have to be handled differently.
1 parent 229e392 commit cb5dc37

File tree

4 files changed

+95
-28
lines changed

4 files changed

+95
-28
lines changed

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -201,11 +201,11 @@ body: |
201201
; GCN-LABEL: name: fabs_s64_ss
202202
; GCN: liveins: $sgpr0_sgpr1
203203
; GCN: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
204-
; GCN: [[FABS:%[0-9]+]]:sreg_64(s64) = G_FABS [[COPY]]
205-
; GCN: $sgpr0_sgpr1 = COPY [[FABS]](s64)
204+
; GCN: [[FABS:%[0-9]+]]:sgpr(s64) = G_FABS [[COPY]]
205+
; GCN: S_ENDPGM 0, implicit [[FABS]](s64)
206206
%0:sgpr(s64) = COPY $sgpr0_sgpr1
207207
%1:sgpr(s64) = G_FABS %0
208-
$sgpr0_sgpr1 = COPY %1
208+
S_ENDPGM 0, implicit %1
209209
...
210210

211211
---
@@ -225,10 +225,10 @@ body: |
225225
; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec
226226
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
227227
; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1
228-
; GCN: $vgpr0_vgpr1 = COPY [[REG_SEQUENCE]]
228+
; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
229229
%0:vgpr(s64) = COPY $vgpr0_vgpr1
230230
%1:vgpr(s64) = G_FABS %0
231-
$vgpr0_vgpr1 = COPY %1
231+
S_ENDPGM 0, implicit %1
232232
...
233233

234234
---
@@ -243,9 +243,9 @@ body: |
243243
; GCN-LABEL: name: fabs_s64_vs
244244
; GCN: liveins: $sgpr0_sgpr1
245245
; GCN: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
246-
; GCN: [[FABS:%[0-9]+]]:vreg_64(s64) = G_FABS [[COPY]]
247-
; GCN: $vgpr0_vgpr1 = COPY [[FABS]](s64)
246+
; GCN: [[FABS:%[0-9]+]]:vgpr(s64) = G_FABS [[COPY]]
247+
; GCN: S_ENDPGM 0, implicit [[FABS]](s64)
248248
%0:sgpr(s64) = COPY $sgpr0_sgpr1
249249
%1:vgpr(s64) = G_FABS %0
250-
$vgpr0_vgpr1 = COPY %1
250+
S_ENDPGM 0, implicit %1
251251
...

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fneg.mir

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -206,10 +206,10 @@ body: |
206206
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
207207
; GCN: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
208208
; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_XOR_B32_]], %subreg.sub1
209-
; GCN: $sgpr0_sgpr1 = COPY [[REG_SEQUENCE]]
209+
; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
210210
%0:sgpr(s64) = COPY $sgpr0_sgpr1
211211
%1:sgpr(s64) = G_FNEG %0
212-
$sgpr0_sgpr1 = COPY %1
212+
S_ENDPGM 0, implicit %1
213213
...
214214

215215
---
@@ -229,10 +229,10 @@ body: |
229229
; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec
230230
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
231231
; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_XOR_B32_e32_]], %subreg.sub1
232-
; GCN: $vgpr0_vgpr1 = COPY [[REG_SEQUENCE]]
232+
; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
233233
%0:vgpr(s64) = COPY $vgpr0_vgpr1
234234
%1:vgpr(s64) = G_FNEG %0
235-
$vgpr0_vgpr1 = COPY %1
235+
S_ENDPGM 0, implicit %1
236236
...
237237

238238
---
@@ -247,11 +247,12 @@ body: |
247247
; GCN-LABEL: name: fneg_s64_vs
248248
; GCN: liveins: $sgpr0_sgpr1
249249
; GCN: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
250-
; GCN: [[FNEG:%[0-9]+]]:vreg_64(s64) = G_FNEG [[COPY]]
251-
; GCN: $vgpr0_vgpr1 = COPY [[FNEG]](s64)
250+
; GCN: [[FNEG:%[0-9]+]]:vgpr(s64) = G_FNEG [[COPY]]
251+
; GCN: S_ENDPGM 0, implicit [[FNEG]](s64)
252252
%0:sgpr(s64) = COPY $sgpr0_sgpr1
253253
%1:vgpr(s64) = G_FNEG %0
254-
$vgpr0_vgpr1 = COPY %1
254+
S_ENDPGM 0, implicit %1
255+
255256
...
256257

257258
---
@@ -268,11 +269,11 @@ body: |
268269
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
269270
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
270271
; GCN: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY]], [[S_MOV_B32_]], implicit-def $scc
271-
; GCN: $sgpr0 = COPY [[S_OR_B32_]]
272+
; GCN: S_ENDPGM 0, implicit [[S_OR_B32_]]
272273
%0:sgpr(s32) = COPY $sgpr0
273274
%1:sgpr(s32) = G_FABS %0
274275
%2:sgpr(s32) = G_FNEG %1
275-
$sgpr0 = COPY %2
276+
S_ENDPGM 0, implicit %2
276277
...
277278

278279
---
@@ -289,11 +290,11 @@ body: |
289290
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
290291
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
291292
; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec
292-
; GCN: $vgpr0 = COPY [[V_XOR_B32_e32_]]
293+
; GCN: S_ENDPGM 0, implicit [[V_XOR_B32_e32_]]
293294
%0:vgpr(s32) = COPY $vgpr0
294295
%1:vgpr(s32) = G_FABS %0
295296
%2:vgpr(s32) = G_FNEG %0
296-
$vgpr0 = COPY %2
297+
S_ENDPGM 0, implicit %2
297298
...
298299

299300
---
@@ -311,11 +312,11 @@ body: |
311312
; GCN: [[FABS:%[0-9]+]]:vgpr_32(s32) = G_FABS [[COPY]]
312313
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32(s16) = S_MOV_B32 2147483648
313314
; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32(s32) = V_XOR_B32_e32 [[S_MOV_B32_]](s16), [[FABS]](s32), implicit $exec
314-
; GCN: $vgpr0 = COPY [[V_XOR_B32_e32_]](s32)
315+
; GCN: S_ENDPGM 0, implicit [[V_XOR_B32_e32_]](s32)
315316
%0:sgpr(s32) = COPY $sgpr0
316317
%1:vgpr(s32) = G_FABS %0
317318
%2:vgpr(s32) = G_FNEG %1
318-
$vgpr0 = COPY %2
319+
S_ENDPGM 0, implicit %2
319320
...
320321

321322
---
@@ -472,11 +473,11 @@ body: |
472473
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648
473474
; GCN: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
474475
; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_OR_B32_]], %subreg.sub1
475-
; GCN: $sgpr0_sgpr1 = COPY [[REG_SEQUENCE]]
476+
; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
476477
%0:sgpr(s64) = COPY $sgpr0_sgpr1
477478
%1:sgpr(s64) = G_FABS %0
478479
%2:sgpr(s64) = G_FNEG %1
479-
$sgpr0_sgpr1 = COPY %2
480+
S_ENDPGM 0, implicit %2
480481
...
481482

482483
---
@@ -496,11 +497,11 @@ body: |
496497
; GCN: [[V_OR_B32_e32_:%[0-9]+]]:vgpr_32 = V_OR_B32_e32 [[COPY1]], [[V_MOV_B32_e32_]], implicit $exec
497498
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
498499
; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[V_OR_B32_e32_]], %subreg.sub1
499-
; GCN: $vgpr0_vgpr1 = COPY [[REG_SEQUENCE]]
500+
; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
500501
%0:vgpr(s64) = COPY $vgpr0_vgpr1
501502
%1:vgpr(s64) = G_FABS %0
502503
%2:vgpr(s64) = G_FNEG %1
503-
$vgpr0_vgpr1 = COPY %2
504+
S_ENDPGM 0, implicit %2
504505
...
505506

506507
---
@@ -521,9 +522,9 @@ body: |
521522
; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32(s16) = V_XOR_B32_e32 [[COPY1]](s32), [[V_MOV_B32_e32_]](s32), implicit $exec
522523
; GCN: [[COPY2:%[0-9]+]]:vgpr_32(s32) = COPY [[FABS]].sub0(s64)
523524
; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64(s64) = REG_SEQUENCE [[COPY2]](s32), %subreg.sub0, [[V_XOR_B32_e32_]](s16), %subreg.sub1
524-
; GCN: $vgpr0_vgpr1 = COPY [[REG_SEQUENCE]](s64)
525+
; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]](s64)
525526
%0:sgpr(s64) = COPY $sgpr0_sgpr1
526527
%1:vgpr(s64) = G_FABS %0
527528
%2:vgpr(s64) = G_FNEG %1
528-
$vgpr0_vgpr1 = COPY %2
529+
S_ENDPGM 0, implicit %2
529530
...

llvm/test/TableGen/GlobalISelEmitterRegSequence.td

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,26 @@ def SUBSOME_INSN : I<(outs SRegs:$dst), (ins SOP:$src), []>;
5656
// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/0, /*TempRegID*/1, /*TempRegFlags*/0,
5757
// CHECK-NEXT: GIR_AddImm, /*InsnID*/0, /*SubRegIndex*/2,
5858
// CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0,
59-
// CHECK-NEXT: GIR_ConstrainSelectedInstOperands, /*InsnID*/0,
59+
// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/0, /*RC DRegs*/1,
60+
// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/1, /*RC SRegs*/0,
61+
// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/3, /*RC SRegs*/0,
6062
def : Pat<(i32 (sext SOP:$src)),
6163
(REG_SEQUENCE DRegs, (SUBSOME_INSN SOP:$src), sub0,
6264
(SUBSOME_INSN SOP:$src), sub1)>;
65+
66+
67+
// CHECK: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_ZEXT,
68+
// CHECK: GIR_BuildMI, /*InsnID*/1, /*Opcode*/TargetOpcode::REG_SEQUENCE,
69+
// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/0, /*TempRegFlags*/RegState::Define,
70+
// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/1, /*TempRegFlags*/0,
71+
// CHECK-NEXT: GIR_AddImm, /*InsnID*/1, /*SubRegIndex*/1,
72+
// CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/2, /*TempRegFlags*/0,
73+
// CHECK-NEXT: GIR_AddImm, /*InsnID*/1, /*SubRegIndex*/2,
74+
// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/0, /*RC DRegs*/1,
75+
// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/1, /*RC SRegs*/0,
76+
// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/3, /*RC SRegs*/0,
77+
// CHECK-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/MyTarget::SOME_INSN,
78+
// Make sure operands are constrained when REG_SEQUENCE isn't the root instruction.
79+
def : Pat<(i32 (zext SOP:$src)),
80+
(SOME_INSN (REG_SEQUENCE DRegs, (SUBSOME_INSN SOP:$src), sub0,
81+
(SUBSOME_INSN SOP:$src), sub1))>;

llvm/utils/TableGen/GlobalISelEmitter.cpp

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4281,6 +4281,29 @@ GlobalISelEmitter::createAndImportSubInstructionRenderer(
42814281
return InsertPtOrError.get();
42824282
}
42834283

4284+
if (OpName == "REG_SEQUENCE") {
4285+
auto SuperClass = inferRegClassFromPattern(Dst->getChild(0));
4286+
M.insertAction<ConstrainOperandToRegClassAction>(
4287+
InsertPt, DstMIBuilder.getInsnID(), 0, **SuperClass);
4288+
4289+
unsigned Num = Dst->getNumChildren();
4290+
for (unsigned I = 1; I != Num; I += 2) {
4291+
TreePatternNode *SubRegChild = Dst->getChild(I + 1);
4292+
4293+
auto SubIdx = inferSubRegIndexForNode(SubRegChild);
4294+
if (!SubIdx)
4295+
return failedImport("REG_SEQUENCE child is not a subreg index");
4296+
4297+
const auto SrcRCDstRCPair =
4298+
(*SuperClass)->getMatchingSubClassWithSubRegs(CGRegs, *SubIdx);
4299+
assert(SrcRCDstRCPair->second && "Couldn't find a matching subclass");
4300+
M.insertAction<ConstrainOperandToRegClassAction>(
4301+
InsertPt, DstMIBuilder.getInsnID(), I, *SrcRCDstRCPair->second);
4302+
}
4303+
4304+
return InsertPtOrError.get();
4305+
}
4306+
42844307
M.insertAction<ConstrainOperandsToDefinitionAction>(InsertPt,
42854308
DstMIBuilder.getInsnID());
42864309
return InsertPtOrError.get();
@@ -4934,6 +4957,30 @@ Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
49344957
return std::move(M);
49354958
}
49364959

4960+
if (DstIName == "REG_SEQUENCE") {
4961+
auto SuperClass = inferRegClassFromPattern(Dst->getChild(0));
4962+
4963+
M.addAction<ConstrainOperandToRegClassAction>(0, 0, **SuperClass);
4964+
4965+
unsigned Num = Dst->getNumChildren();
4966+
for (unsigned I = 1; I != Num; I += 2) {
4967+
TreePatternNode *SubRegChild = Dst->getChild(I + 1);
4968+
4969+
auto SubIdx = inferSubRegIndexForNode(SubRegChild);
4970+
if (!SubIdx)
4971+
return failedImport("REG_SEQUENCE child is not a subreg index");
4972+
4973+
const auto SrcRCDstRCPair =
4974+
(*SuperClass)->getMatchingSubClassWithSubRegs(CGRegs, *SubIdx);
4975+
4976+
M.addAction<ConstrainOperandToRegClassAction>(0, I,
4977+
*SrcRCDstRCPair->second);
4978+
}
4979+
4980+
++NumPatternImported;
4981+
return std::move(M);
4982+
}
4983+
49374984
M.addAction<ConstrainOperandsToDefinitionAction>(0);
49384985

49394986
// We're done with this pattern! It's eligible for GISel emission; return it.

0 commit comments

Comments
 (0)