Skip to content

Commit 1e815ce

Browse files
authored
[AMDGPU] Use SDNodeXForm to select a few VOP3P modifiers, NFC (#151907)
It is not necessary to use ComplexPattern to select VOP3PModsNeg, VOP3PModsNegs and VOP3PModsNegAbs. We can use SDNodeXForm instead.
1 parent 5514e5e commit 1e815ce

File tree

7 files changed

+95
-165
lines changed

7 files changed

+95
-165
lines changed

llvm/lib/Target/AMDGPU/AMDGPUGISel.td

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -51,18 +51,6 @@ def gi_vop3pmodsdot :
5151
GIComplexOperandMatcher<s32, "selectVOP3PModsDOT">,
5252
GIComplexPatternEquiv<VOP3PModsDOT>;
5353

54-
def gi_vop3pmodsneg :
55-
GIComplexOperandMatcher<s32, "selectVOP3PModsNeg">,
56-
GIComplexPatternEquiv<VOP3PModsNeg>;
57-
58-
def gi_vop3pmodsnegs :
59-
GIComplexOperandMatcher<s32, "selectVOP3PModsNegs">,
60-
GIComplexPatternEquiv<VOP3PModsNegs>;
61-
62-
def gi_dotiuvop3pmodsnegabs :
63-
GIComplexOperandMatcher<s32, "selectVOP3PModsNegAbs">,
64-
GIComplexPatternEquiv<VOP3PModsNegAbs>;
65-
6654
def gi_wmmaopselvop3pmods :
6755
GIComplexOperandMatcher<s32, "selectWMMAOpSelVOP3PMods">,
6856
GIComplexPatternEquiv<WMMAOpSelVOP3PMods>;
@@ -452,6 +440,13 @@ def gi_fp_pow2_to_exponent : GICustomOperandRenderer<"renderFPPow2ToExponent">,
452440
def gi_as_hw_round_mode : GICustomOperandRenderer<"renderRoundMode">,
453441
GISDNodeXFormEquiv<as_hw_round_mode>;
454442

443+
def gi_VOP3PModsNeg : GICustomOperandRenderer<"renderVOP3PModsNeg">,
444+
GISDNodeXFormEquiv<VOP3PModsNeg>;
445+
def gi_VOP3PModsNegs : GICustomOperandRenderer<"renderVOP3PModsNegs">,
446+
GISDNodeXFormEquiv<VOP3PModsNegs>;
447+
def gi_VOP3PModsNegAbs : GICustomOperandRenderer<"renderVOP3PModsNegAbs">,
448+
GISDNodeXFormEquiv<VOP3PModsNegAbs>;
449+
455450
def gi_prefetch_loc : GICustomOperandRenderer<"renderPrefetchLoc">,
456451
GISDNodeXFormEquiv<PrefetchLoc>;
457452

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp

Lines changed: 0 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -3449,63 +3449,6 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, SDValue &Src,
34493449
return SelectVOP3PMods(In, Src, SrcMods, true);
34503450
}
34513451

3452-
// Select neg_lo from the i1 immediate operand.
3453-
bool AMDGPUDAGToDAGISel::SelectVOP3PModsNeg(SDValue In, SDValue &Src) const {
3454-
const ConstantSDNode *C = cast<ConstantSDNode>(In);
3455-
// Literal i1 value set in intrinsic, represents SrcMods for the next operand.
3456-
// 1 promotes packed values to signed, 0 treats them as unsigned.
3457-
assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");
3458-
3459-
unsigned Mods = SISrcMods::OP_SEL_1;
3460-
unsigned SrcSign = C->getZExtValue();
3461-
if (SrcSign == 1)
3462-
Mods ^= SISrcMods::NEG;
3463-
3464-
Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3465-
return true;
3466-
}
3467-
3468-
// Select both neg_lo and neg_hi from the i1 immediate operand. This is
3469-
// specifically for F16/BF16 operands in WMMA instructions, where neg_lo applies
3470-
// to matrix's even k elements, and neg_hi applies to matrix's odd k elements.
3471-
bool AMDGPUDAGToDAGISel::SelectVOP3PModsNegs(SDValue In, SDValue &Src) const {
3472-
const ConstantSDNode *C = cast<ConstantSDNode>(In);
3473-
// Literal i1 value set in intrinsic, represents SrcMods for the next operand.
3474-
// 1 promotes packed values to signed, 0 treats them as unsigned.
3475-
assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");
3476-
3477-
unsigned Mods = SISrcMods::OP_SEL_1;
3478-
unsigned SrcSign = C->getZExtValue();
3479-
if (SrcSign == 1)
3480-
Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
3481-
3482-
Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3483-
return true;
3484-
}
3485-
3486-
// Select neg, abs, or both neg and abs from the i16 immediate operans.
3487-
bool AMDGPUDAGToDAGISel::SelectVOP3PModsNegAbs(SDValue In, SDValue &Src) const {
3488-
const ConstantSDNode *C = cast<ConstantSDNode>(In);
3489-
unsigned Mods = SISrcMods::OP_SEL_1;
3490-
unsigned SrcMod = C->getZExtValue();
3491-
switch (SrcMod) {
3492-
default: // Any other value will be silently ignored (considered as 0).
3493-
break;
3494-
case 1:
3495-
Mods ^= SISrcMods::NEG;
3496-
break;
3497-
case 2:
3498-
Mods ^= SISrcMods::ABS;
3499-
break;
3500-
case 3:
3501-
Mods ^= (SISrcMods::NEG | SISrcMods::ABS);
3502-
break;
3503-
}
3504-
3505-
Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3506-
return true;
3507-
}
3508-
35093452
bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
35103453
SDValue &Src) const {
35113454
const ConstantSDNode *C = cast<ConstantSDNode>(In);

llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -241,9 +241,6 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
241241
bool IsDOT = false) const;
242242
bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
243243

244-
bool SelectVOP3PModsNeg(SDValue In, SDValue &Src) const;
245-
bool SelectVOP3PModsNegs(SDValue In, SDValue &Src) const;
246-
bool SelectVOP3PModsNegAbs(SDValue In, SDValue &Src) const;
247244
bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
248245

249246
bool SelectWMMAModsF32NegAbs(SDValue In, SDValue &Src,

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 32 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -4988,66 +4988,6 @@ AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {
49884988
return selectVOP3PRetHelper(Root, true);
49894989
}
49904990

4991-
// Select neg_lo from the i1 immediate operand.
4992-
InstructionSelector::ComplexRendererFns
4993-
AMDGPUInstructionSelector::selectVOP3PModsNeg(MachineOperand &Root) const {
4994-
// Literal i1 value set in intrinsic, represents SrcMods for the next operand.
4995-
// Value is in Imm operand as i1 sign extended to int64_t.
4996-
// 1(-1) promotes packed values to signed, 0 treats them as unsigned.
4997-
assert((Root.isImm() && (Root.getImm() == -1 || Root.getImm() == 0)) &&
4998-
"expected i1 value");
4999-
unsigned Mods = SISrcMods::OP_SEL_1;
5000-
if (Root.getImm() == -1)
5001-
Mods ^= SISrcMods::NEG;
5002-
return {{
5003-
[=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
5004-
}};
5005-
}
5006-
5007-
// Select both neg_lo and neg_hi from the i1 immediate operand. This is
5008-
// specifically for F16/BF16 operands in WMMA instructions, where neg_lo applies
5009-
// to matrix's even k elements, and neg_hi applies to matrix's odd k elements.
5010-
InstructionSelector::ComplexRendererFns
5011-
AMDGPUInstructionSelector::selectVOP3PModsNegs(MachineOperand &Root) const {
5012-
// Literal i1 value set in intrinsic, represents SrcMods for the next operand.
5013-
// Value is in Imm operand as i1 sign extended to int64_t.
5014-
// 1(-1) promotes packed values to signed, 0 treats them as unsigned.
5015-
assert((Root.isImm() && (Root.getImm() == -1 || Root.getImm() == 0)) &&
5016-
"expected i1 value");
5017-
unsigned Mods = SISrcMods::OP_SEL_1;
5018-
if (Root.getImm() == -1)
5019-
Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
5020-
return {{
5021-
[=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
5022-
}};
5023-
}
5024-
5025-
// Select neg, abs, or both neg and abs from the i16 immediate operans.
5026-
InstructionSelector::ComplexRendererFns
5027-
AMDGPUInstructionSelector::selectVOP3PModsNegAbs(MachineOperand &Root) const {
5028-
5029-
assert(Root.isImm() && "Modifier for C must be an immediate");
5030-
5031-
unsigned Mods = SISrcMods::OP_SEL_1;
5032-
switch (Root.getImm()) {
5033-
default: // Any other value will be silently ignored (considered as 0).
5034-
break;
5035-
case 1:
5036-
Mods ^= SISrcMods::NEG;
5037-
break;
5038-
case 2:
5039-
Mods ^= SISrcMods::ABS;
5040-
break;
5041-
case 3:
5042-
Mods ^= (SISrcMods::NEG | SISrcMods::ABS);
5043-
break;
5044-
}
5045-
5046-
return {{
5047-
[=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
5048-
}};
5049-
}
5050-
50514991
InstructionSelector::ComplexRendererFns
50524992
AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
50534993
MachineOperand &Root) const {
@@ -7102,6 +7042,38 @@ void AMDGPUInstructionSelector::renderRoundMode(MachineInstrBuilder &MIB,
71027042
MIB.addImm((MI.getOperand(OpIdx).getImm() + 3) % 4);
71037043
}
71047044

7045+
void AMDGPUInstructionSelector::renderVOP3PModsNeg(MachineInstrBuilder &MIB,
7046+
const MachineInstr &MI,
7047+
int OpIdx) const {
7048+
unsigned Mods = SISrcMods::OP_SEL_1;
7049+
if (MI.getOperand(OpIdx).getImm())
7050+
Mods ^= SISrcMods::NEG;
7051+
MIB.addImm((int64_t)Mods);
7052+
}
7053+
7054+
void AMDGPUInstructionSelector::renderVOP3PModsNegs(MachineInstrBuilder &MIB,
7055+
const MachineInstr &MI,
7056+
int OpIdx) const {
7057+
unsigned Mods = SISrcMods::OP_SEL_1;
7058+
if (MI.getOperand(OpIdx).getImm())
7059+
Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
7060+
MIB.addImm((int64_t)Mods);
7061+
}
7062+
7063+
void AMDGPUInstructionSelector::renderVOP3PModsNegAbs(MachineInstrBuilder &MIB,
7064+
const MachineInstr &MI,
7065+
int OpIdx) const {
7066+
unsigned Val = MI.getOperand(OpIdx).getImm();
7067+
unsigned Mods = SISrcMods::OP_SEL_1; // default: none
7068+
if (Val == 1) // neg
7069+
Mods ^= SISrcMods::NEG;
7070+
if (Val == 2) // abs
7071+
Mods ^= SISrcMods::ABS;
7072+
if (Val == 3) // neg and abs
7073+
Mods ^= (SISrcMods::NEG | SISrcMods::ABS);
7074+
MIB.addImm((int64_t)Mods);
7075+
}
7076+
71057077
void AMDGPUInstructionSelector::renderPrefetchLoc(MachineInstrBuilder &MIB,
71067078
const MachineInstr &MI,
71077079
int OpIdx) const {

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -199,13 +199,6 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
199199
InstructionSelector::ComplexRendererFns
200200
selectVOP3PModsDOT(MachineOperand &Root) const;
201201

202-
InstructionSelector::ComplexRendererFns
203-
selectVOP3PModsNeg(MachineOperand &Root) const;
204-
InstructionSelector::ComplexRendererFns
205-
selectVOP3PModsNegs(MachineOperand &Root) const;
206-
InstructionSelector::ComplexRendererFns
207-
selectVOP3PModsNegAbs(MachineOperand &Root) const;
208-
209202
InstructionSelector::ComplexRendererFns
210203
selectWMMAOpSelVOP3PMods(MachineOperand &Root) const;
211204

@@ -419,6 +412,13 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
419412
void renderRoundMode(MachineInstrBuilder &MIB, const MachineInstr &MI,
420413
int OpIdx) const;
421414

415+
void renderVOP3PModsNeg(MachineInstrBuilder &MIB, const MachineInstr &MI,
416+
int OpIdx) const;
417+
void renderVOP3PModsNegs(MachineInstrBuilder &MIB, const MachineInstr &MI,
418+
int OpIdx) const;
419+
void renderVOP3PModsNegAbs(MachineInstrBuilder &MIB, const MachineInstr &MI,
420+
int OpIdx) const;
421+
422422
void renderPrefetchLoc(MachineInstrBuilder &MIB, const MachineInstr &MI,
423423
int OpIdx) const;
424424

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -908,6 +908,32 @@ def SupportedRoundMode : TImmLeaf<i32, [{
908908
Imm == (int)RoundingMode::TowardNegative;
909909
}]>;
910910

911+
def VOP3PModsNeg : SDNodeXForm<timm, [{
912+
unsigned Mods = SISrcMods::OP_SEL_1;
913+
if (N->getZExtValue())
914+
Mods ^= SISrcMods::NEG;
915+
return CurDAG->getTargetConstant(Mods, SDLoc(N), MVT::i32);
916+
}]>;
917+
918+
def VOP3PModsNegs : SDNodeXForm<timm, [{
919+
unsigned Mods = SISrcMods::OP_SEL_1;
920+
if (N->getZExtValue())
921+
Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
922+
return CurDAG->getTargetConstant(Mods, SDLoc(N), MVT::i32);
923+
}]>;
924+
925+
def VOP3PModsNegAbs : SDNodeXForm<timm, [{
926+
unsigned Val = N->getZExtValue();
927+
unsigned Mods = SISrcMods::OP_SEL_1; // default: none
928+
if (Val == 1) // neg
929+
Mods ^= SISrcMods::NEG;
930+
if (Val == 2) // abs
931+
Mods ^= SISrcMods::ABS;
932+
if (Val == 3) // neg and abs
933+
Mods ^= (SISrcMods::NEG | SISrcMods::ABS);
934+
return CurDAG->getTargetConstant(Mods, SDLoc(N), MVT::i32);
935+
}]>;
936+
911937
class bitextract_imm<int bitnum> : SDNodeXForm<imm, [{
912938
uint64_t Imm = N->getZExtValue();
913939
unsigned Bit = (Imm >> }] # bitnum # [{ ) & 1;
@@ -1653,9 +1679,6 @@ def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">;
16531679
def VOP3PMods : ComplexPattern<untyped, 2, "SelectVOP3PMods">;
16541680

16551681
def VOP3PModsDOT : ComplexPattern<untyped, 2, "SelectVOP3PModsDOT">;
1656-
def VOP3PModsNeg : ComplexPattern<untyped, 1, "SelectVOP3PModsNeg">;
1657-
def VOP3PModsNegs : ComplexPattern<untyped, 1, "SelectVOP3PModsNegs">; // chfang: not use complex pattern?
1658-
def VOP3PModsNegAbs : ComplexPattern<untyped, 1, "SelectVOP3PModsNegAbs">;
16591682
def WMMAOpSelVOP3PMods : ComplexPattern<untyped, 1, "SelectWMMAOpSelVOP3PMods">;
16601683

16611684
def WMMAModsF32NegAbs : ComplexPattern<untyped, 2, "SelectWMMAModsF32NegAbs">;

llvm/lib/Target/AMDGPU/VOP3PInstructions.td

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -557,11 +557,11 @@ multiclass VOP3PDOTIUInst <string OpName, SDPatternOperator intrinsic_node> {
557557
null_frag, 1>;
558558
// Dot-iu instructions consider input as signed if imod neg bits are set. Thus
559559
// Dot-iu Intrinsics have extra operands and require separate codegen pattern.
560-
def : GCNPat < (intrinsic_node (VOP3PModsNeg i32:$src0_mods), i32:$src0,
561-
(VOP3PModsNeg i32:$src1_mods), i32:$src1,
560+
def : GCNPat < (intrinsic_node timm:$src0_mods, i32:$src0,
561+
timm:$src1_mods, i32:$src1,
562562
i32:$src2, (i1 timm:$clamp)),
563-
(!cast<Instruction>(NAME) $src0_mods, i32:$src0,
564-
$src1_mods, i32:$src1,
563+
(!cast<Instruction>(NAME) (VOP3PModsNeg $src0_mods), i32:$src0,
564+
(VOP3PModsNeg $src1_mods), i32:$src1,
565565
(i32 8), i32:$src2, i1:$clamp)
566566
>;
567567
}
@@ -1302,11 +1302,11 @@ class WMMAOpSelPat<Instruction Inst, SDPatternOperator node, VOPProfile P> :
13021302

13031303
class WMMAUIClampPat<Instruction Inst, SDPatternOperator node, VOPProfile P> :
13041304
GCNPat < (P.DstVT (node
1305-
(VOP3PModsNeg i32:$src0_modifiers), (P.Src0VT P.Src0VT:$src0),
1306-
(VOP3PModsNeg i32:$src1_modifiers), (P.Src1VT P.Src1VT:$src1),
1305+
timm:$src0_modifiers, (P.Src0VT P.Src0VT:$src0),
1306+
timm:$src1_modifiers, (P.Src1VT P.Src1VT:$src1),
13071307
(P.Src2VT P.Src2VT:$src2), (i1 timm:$clamp)
13081308
)),
1309-
(P.DstVT (Inst i32:$src0_modifiers, P.Src0VT:$src0, i32:$src1_modifiers, P.Src1VT:$src1, (i32 8), P.Src2VT:$src2, i1:$clamp))
1309+
(P.DstVT (Inst (VOP3PModsNeg $src0_modifiers), P.Src0VT:$src0, (VOP3PModsNeg $src1_modifiers), P.Src1VT:$src1, (i32 8), P.Src2VT:$src2, i1:$clamp))
13101310
>;
13111311

13121312
class WMMAOpcodeMapping<Instruction TwoAddr, Instruction ThreeAddr> {
@@ -1563,44 +1563,44 @@ class VOP3PWMMA_Profile<list<ValueType> ArgTy, bit _IsSWMMAC, int _IndexType,
15631563
bit IsAB_F16_IMod0 = !and(IsAB_F16, !not(HasIModOp));
15641564
bit IsAB_F32F64_IMod1 = !and(!or(IsAB_F64, IsAB_F32), HasIModOp);
15651565
bit IsAB_F16BF16_IMod1 = !and(!or(IsAB_F16, IsAB_BF16), HasIModOp);
1566-
dag Src0InPat = !cond(IsAB_F32F64_IMod1 : (ins (VOP3PModsNeg i32:$src0_modifiers), Src0VT:$src0),
1567-
IsAB_F16BF16_IMod1 : (ins (VOP3PModsNegs i32:$src0_modifiers), Src0VT:$src0),
1566+
dag Src0InPat = !cond(IsAB_F32F64_IMod1 : (ins timm:$src0_modifiers, Src0VT:$src0),
1567+
IsAB_F16BF16_IMod1 : (ins timm:$src0_modifiers, Src0VT:$src0),
15681568
IsAB_F16_IMod0 : (ins (Src0VT (WMMAModsF16Neg Src0VT:$src0, i32:$src0_modifiers))),
15691569
IsAB_BF16_IMod0 : (ins Src0VT:$src0),
1570-
IsIU : (ins (VOP3PModsNeg i32:$src0_modifiers), Src0VT:$src0),
1570+
IsIU : (ins timm:$src0_modifiers, Src0VT:$src0),
15711571
HasMatrixFMT : (ins timm:$matrix_a_fmt, Src0VT:$src0),
15721572
NoABMods : (ins Src0VT:$src0));
1573-
dag Src0OutPat = !cond(IsAB_F32F64_IMod1 : (ins i32:$src0_modifiers, Src0VT:$src0),
1574-
IsAB_F16BF16_IMod1 : (ins i32:$src0_modifiers, Src0VT:$src0),
1573+
dag Src0OutPat = !cond(IsAB_F32F64_IMod1 : (ins (VOP3PModsNeg $src0_modifiers), Src0VT:$src0),
1574+
IsAB_F16BF16_IMod1 : (ins (VOP3PModsNegs $src0_modifiers), Src0VT:$src0),
15751575
IsAB_F16_IMod0 : (ins i32:$src0_modifiers, Src0VT:$src0),
15761576
IsAB_BF16_IMod0 : (ins (i32 8), Src0VT:$src0),
1577-
IsIU : (ins i32:$src0_modifiers, Src0VT:$src0),
1577+
IsIU : (ins (VOP3PModsNeg $src0_modifiers), Src0VT:$src0),
15781578
NoABMods : (ins Src0VT:$src0));
1579-
dag Src1InPat = !cond(IsAB_F32F64_IMod1 : (ins (VOP3PModsNeg i32:$src1_modifiers), Src1VT:$src1),
1580-
IsAB_F16BF16_IMod1 : (ins (VOP3PModsNegs i32:$src1_modifiers), Src1VT:$src1),
1579+
dag Src1InPat = !cond(IsAB_F32F64_IMod1 : (ins timm:$src1_modifiers, Src1VT:$src1),
1580+
IsAB_F16BF16_IMod1 : (ins timm:$src1_modifiers, Src1VT:$src1),
15811581
IsAB_F16_IMod0 : (ins (Src1VT (WMMAModsF16Neg Src1VT:$src1, i32:$src1_modifiers))),
15821582
IsAB_BF16_IMod0 : (ins Src1VT:$src1),
1583-
IsIU : (ins (VOP3PModsNeg i32:$src1_modifiers), Src1VT:$src1),
1583+
IsIU : (ins timm:$src1_modifiers, Src1VT:$src1),
15841584
HasMatrixFMT : (ins timm:$matrix_b_fmt, Src1VT:$src1),
15851585
NoABMods : (ins Src1VT:$src1));
1586-
dag Src1OutPat = !cond(IsAB_F32F64_IMod1 : (ins i32:$src1_modifiers, Src1VT:$src1),
1587-
IsAB_F16BF16_IMod1 : (ins i32:$src1_modifiers, Src1VT:$src1),
1586+
dag Src1OutPat = !cond(IsAB_F32F64_IMod1 : (ins (VOP3PModsNeg $src1_modifiers), Src1VT:$src1),
1587+
IsAB_F16BF16_IMod1 : (ins (VOP3PModsNegs $src1_modifiers), Src1VT:$src1),
15881588
IsAB_F16_IMod0 : (ins i32:$src1_modifiers, Src1VT:$src1),
15891589
IsAB_BF16_IMod0 : (ins (i32 8), Src1VT:$src1),
1590-
IsIU : (ins i32:$src1_modifiers, Src1VT:$src1),
1590+
IsIU : (ins (VOP3PModsNeg $src1_modifiers), Src1VT:$src1),
15911591
NoABMods : (ins Src1VT:$src1));
15921592
bit IsC_IMod1 = !and(HasIModOp, IsWMMA, !not(IsIU), !not(IsXF32));
15931593
bit IsC_F32_IMod0 = !and(IsC_F32, !not(HasIModOp));
15941594
bit IsC_F16_IMod0 = !and(IsC_F16, !not(HasIModOp));
15951595
bit IsC_BF16_IMod0 = !and(IsC_BF16, !not(HasIModOp));
15961596
bit IsIUXF32 = !or(IsIU, IsXF32);
1597-
dag Src2InPatWmma = !cond(IsC_IMod1 : (ins (VOP3PModsNegAbs i32:$src2_modifiers), Src2VT:$src2),
1597+
dag Src2InPatWmma = !cond(IsC_IMod1 : (ins timm:$src2_modifiers, Src2VT:$src2),
15981598
IsC_F32_IMod0 : (ins (Src2VT (WMMAModsF32NegAbs Src2VT:$src2, i32:$src2_modifiers))),
15991599
IsC_F16_IMod0 : (ins (Src2VT (WMMAModsF16NegAbs Src2VT:$src2, i32:$src2_modifiers))),
16001600
IsC_BF16_IMod0 : (ins Src2VT:$src2),
16011601
IsIUXF32 : (ins Src2VT:$src2),
16021602
IsSWMMAC : (ins));
1603-
dag Src2OutPatWmma = !cond(IsC_IMod1 : (ins i32:$src2_modifiers, Src2VT:$src2),
1603+
dag Src2OutPatWmma = !cond(IsC_IMod1 : (ins (VOP3PModsNegAbs $src2_modifiers), Src2VT:$src2),
16041604
IsC_F32_IMod0 : (ins i32:$src2_modifiers, Src2VT:$src2),
16051605
IsC_F16_IMod0 : (ins i32:$src2_modifiers, Src2VT:$src2),
16061606
IsC_BF16_IMod0 : (ins (i32 8), Src2VT:$src2),
@@ -1616,8 +1616,8 @@ class VOP3PWMMA_Profile<list<ValueType> ArgTy, bit _IsSWMMAC, int _IndexType,
16161616
!eq(IndexType, 16): (ins i32:$src2, i32:$index_key_16bit),
16171617
!eq(IndexType, 32): (ins i64:$src2, i32:$index_key_32bit));
16181618
dag MatrixFMTOutPat = !if(HasMatrixFMT, (ins i32:$matrix_a_fmt, i32:$matrix_b_fmt), (ins));
1619-
dag Src2InlineInPat = !con(!if(IsC_IMod1, (ins (VOP3PModsNegAbs i32:$src2_modifiers)), (ins)), (ins (Src2VT (WMMAVISrc Src2VT:$src2))));
1620-
dag Src2InlineOutPat = !con(!if(IsIUXF32, (ins), !if(IsC_IMod1, (ins i32:$src2_modifiers), (ins (i32 8)))), (ins Src2VT:$src2));
1619+
dag Src2InlineInPat = !con(!if(IsC_IMod1, (ins timm:$src2_modifiers), (ins)), (ins (Src2VT (WMMAVISrc Src2VT:$src2))));
1620+
dag Src2InlineOutPat = !con(!if(IsIUXF32, (ins), !if(IsC_IMod1, (ins (VOP3PModsNegAbs $src2_modifiers)), (ins (i32 8)))), (ins Src2VT:$src2));
16211621
dag MatrixScaleInPat = !if(HasMatrixScale, (ins timm:$matrix_a_scale, timm:$matrix_a_scale_fmt, ScaleTy:$scale_src0,
16221622
timm:$matrix_b_scale, timm:$matrix_b_scale_fmt, ScaleTy:$scale_src1),
16231623
(ins));

0 commit comments

Comments
 (0)