-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[AMDGPU] Use SDNodeXForm to select a few VOP3P modifiers, NFC #151907
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
It is not necessary to use ComplexPattern to select VOP3PModsNeg, VOP3PModsNegs and VOP3PModsNegAbs. We can use SDNodeXForm instead.
@llvm/pr-subscribers-backend-amdgpu Author: Changpeng Fang (changpeng) ChangesIt is not necessary to use ComplexPattern to select VOP3PModsNeg, VOP3PModsNegs Patch is 20.98 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/151907.diff 7 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 992572f17e5b9..394a143dd3086 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -51,18 +51,6 @@ def gi_vop3pmodsdot :
GIComplexOperandMatcher<s32, "selectVOP3PModsDOT">,
GIComplexPatternEquiv<VOP3PModsDOT>;
-def gi_vop3pmodsneg :
- GIComplexOperandMatcher<s32, "selectVOP3PModsNeg">,
- GIComplexPatternEquiv<VOP3PModsNeg>;
-
-def gi_vop3pmodsnegs :
- GIComplexOperandMatcher<s32, "selectVOP3PModsNegs">,
- GIComplexPatternEquiv<VOP3PModsNegs>;
-
-def gi_dotiuvop3pmodsnegabs :
- GIComplexOperandMatcher<s32, "selectVOP3PModsNegAbs">,
- GIComplexPatternEquiv<VOP3PModsNegAbs>;
-
def gi_wmmaopselvop3pmods :
GIComplexOperandMatcher<s32, "selectWMMAOpSelVOP3PMods">,
GIComplexPatternEquiv<WMMAOpSelVOP3PMods>;
@@ -452,6 +440,13 @@ def gi_fp_pow2_to_exponent : GICustomOperandRenderer<"renderFPPow2ToExponent">,
def gi_as_hw_round_mode : GICustomOperandRenderer<"renderRoundMode">,
GISDNodeXFormEquiv<as_hw_round_mode>;
+def gi_VOP3PModsNeg : GICustomOperandRenderer<"renderVOP3PModsNeg">,
+ GISDNodeXFormEquiv<VOP3PModsNeg>;
+def gi_VOP3PModsNegs : GICustomOperandRenderer<"renderVOP3PModsNegs">,
+ GISDNodeXFormEquiv<VOP3PModsNegs>;
+def gi_VOP3PModsNegAbs : GICustomOperandRenderer<"renderVOP3PModsNegAbs">,
+ GISDNodeXFormEquiv<VOP3PModsNegAbs>;
+
def gi_prefetch_loc : GICustomOperandRenderer<"renderPrefetchLoc">,
GISDNodeXFormEquiv<PrefetchLoc>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 39b42002b907a..fb83388e5e265 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -3449,63 +3449,6 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, SDValue &Src,
return SelectVOP3PMods(In, Src, SrcMods, true);
}
-// Select neg_lo from the i1 immediate operand.
-bool AMDGPUDAGToDAGISel::SelectVOP3PModsNeg(SDValue In, SDValue &Src) const {
- const ConstantSDNode *C = cast<ConstantSDNode>(In);
- // Literal i1 value set in intrinsic, represents SrcMods for the next operand.
- // 1 promotes packed values to signed, 0 treats them as unsigned.
- assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");
-
- unsigned Mods = SISrcMods::OP_SEL_1;
- unsigned SrcSign = C->getZExtValue();
- if (SrcSign == 1)
- Mods ^= SISrcMods::NEG;
-
- Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
- return true;
-}
-
-// Select both neg_lo and neg_hi from the i1 immediate operand. This is
-// specifically for F16/BF16 operands in WMMA instructions, where neg_lo applies
-// to matrix's even k elements, and neg_hi applies to matrix's odd k elements.
-bool AMDGPUDAGToDAGISel::SelectVOP3PModsNegs(SDValue In, SDValue &Src) const {
- const ConstantSDNode *C = cast<ConstantSDNode>(In);
- // Literal i1 value set in intrinsic, represents SrcMods for the next operand.
- // 1 promotes packed values to signed, 0 treats them as unsigned.
- assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");
-
- unsigned Mods = SISrcMods::OP_SEL_1;
- unsigned SrcSign = C->getZExtValue();
- if (SrcSign == 1)
- Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
-
- Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
- return true;
-}
-
-// Select neg, abs, or both neg and abs from the i16 immediate operans.
-bool AMDGPUDAGToDAGISel::SelectVOP3PModsNegAbs(SDValue In, SDValue &Src) const {
- const ConstantSDNode *C = cast<ConstantSDNode>(In);
- unsigned Mods = SISrcMods::OP_SEL_1;
- unsigned SrcMod = C->getZExtValue();
- switch (SrcMod) {
- default: // Any other value will be silently ignored (considered as 0).
- break;
- case 1:
- Mods ^= SISrcMods::NEG;
- break;
- case 2:
- Mods ^= SISrcMods::ABS;
- break;
- case 3:
- Mods ^= (SISrcMods::NEG | SISrcMods::ABS);
- break;
- }
-
- Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
- return true;
-}
-
bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
SDValue &Src) const {
const ConstantSDNode *C = cast<ConstantSDNode>(In);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 983f1aa8fab86..16388e750026c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -241,9 +241,6 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
bool IsDOT = false) const;
bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;
- bool SelectVOP3PModsNeg(SDValue In, SDValue &Src) const;
- bool SelectVOP3PModsNegs(SDValue In, SDValue &Src) const;
- bool SelectVOP3PModsNegAbs(SDValue In, SDValue &Src) const;
bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;
bool SelectWMMAModsF32NegAbs(SDValue In, SDValue &Src,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index b0d3b12471a38..212f4872303de 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4988,66 +4988,6 @@ AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {
return selectVOP3PRetHelper(Root, true);
}
-// Select neg_lo from the i1 immediate operand.
-InstructionSelector::ComplexRendererFns
-AMDGPUInstructionSelector::selectVOP3PModsNeg(MachineOperand &Root) const {
- // Literal i1 value set in intrinsic, represents SrcMods for the next operand.
- // Value is in Imm operand as i1 sign extended to int64_t.
- // 1(-1) promotes packed values to signed, 0 treats them as unsigned.
- assert((Root.isImm() && (Root.getImm() == -1 || Root.getImm() == 0)) &&
- "expected i1 value");
- unsigned Mods = SISrcMods::OP_SEL_1;
- if (Root.getImm() == -1)
- Mods ^= SISrcMods::NEG;
- return {{
- [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
- }};
-}
-
-// Select both neg_lo and neg_hi from the i1 immediate operand. This is
-// specifically for F16/BF16 operands in WMMA instructions, where neg_lo applies
-// to matrix's even k elements, and neg_hi applies to matrix's odd k elements.
-InstructionSelector::ComplexRendererFns
-AMDGPUInstructionSelector::selectVOP3PModsNegs(MachineOperand &Root) const {
- // Literal i1 value set in intrinsic, represents SrcMods for the next operand.
- // Value is in Imm operand as i1 sign extended to int64_t.
- // 1(-1) promotes packed values to signed, 0 treats them as unsigned.
- assert((Root.isImm() && (Root.getImm() == -1 || Root.getImm() == 0)) &&
- "expected i1 value");
- unsigned Mods = SISrcMods::OP_SEL_1;
- if (Root.getImm() == -1)
- Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
- return {{
- [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
- }};
-}
-
-// Select neg, abs, or both neg and abs from the i16 immediate operans.
-InstructionSelector::ComplexRendererFns
-AMDGPUInstructionSelector::selectVOP3PModsNegAbs(MachineOperand &Root) const {
-
- assert(Root.isImm() && "Modifier for C must be an immediate");
-
- unsigned Mods = SISrcMods::OP_SEL_1;
- switch (Root.getImm()) {
- default: // Any other value will be silently ignored (considered as 0).
- break;
- case 1:
- Mods ^= SISrcMods::NEG;
- break;
- case 2:
- Mods ^= SISrcMods::ABS;
- break;
- case 3:
- Mods ^= (SISrcMods::NEG | SISrcMods::ABS);
- break;
- }
-
- return {{
- [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
- }};
-}
-
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
MachineOperand &Root) const {
@@ -7102,6 +7042,38 @@ void AMDGPUInstructionSelector::renderRoundMode(MachineInstrBuilder &MIB,
MIB.addImm((MI.getOperand(OpIdx).getImm() + 3) % 4);
}
+void AMDGPUInstructionSelector::renderVOP3PModsNeg(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ if (MI.getOperand(OpIdx).getImm())
+ Mods ^= SISrcMods::NEG;
+ MIB.addImm((int64_t)Mods);
+}
+
+void AMDGPUInstructionSelector::renderVOP3PModsNegs(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ if (MI.getOperand(OpIdx).getImm())
+ Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
+ MIB.addImm((int64_t)Mods);
+}
+
+void AMDGPUInstructionSelector::renderVOP3PModsNegAbs(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ unsigned Val = MI.getOperand(OpIdx).getImm();
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ if (Val == 1)
+ Mods ^= SISrcMods::NEG;
+ if (Val == 2)
+ Mods ^= SISrcMods::ABS;
+ if (Val == 3)
+ Mods ^= (SISrcMods::NEG | SISrcMods::ABS);
+ MIB.addImm((int64_t)Mods);
+}
+
void AMDGPUInstructionSelector::renderPrefetchLoc(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 140e753bf976a..d0b5dc5e11e39 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -199,13 +199,6 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
InstructionSelector::ComplexRendererFns
selectVOP3PModsDOT(MachineOperand &Root) const;
- InstructionSelector::ComplexRendererFns
- selectVOP3PModsNeg(MachineOperand &Root) const;
- InstructionSelector::ComplexRendererFns
- selectVOP3PModsNegs(MachineOperand &Root) const;
- InstructionSelector::ComplexRendererFns
- selectVOP3PModsNegAbs(MachineOperand &Root) const;
-
InstructionSelector::ComplexRendererFns
selectWMMAOpSelVOP3PMods(MachineOperand &Root) const;
@@ -419,6 +412,13 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
void renderRoundMode(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;
+ void renderVOP3PModsNeg(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
+ void renderVOP3PModsNegs(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
+ void renderVOP3PModsNegAbs(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
+
void renderPrefetchLoc(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 4698a5805ee0c..4380ff1bf33d4 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -908,6 +908,32 @@ def SupportedRoundMode : TImmLeaf<i32, [{
Imm == (int)RoundingMode::TowardNegative;
}]>;
+def VOP3PModsNeg : SDNodeXForm<timm, [{
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ if (N->getZExtValue())
+ Mods ^= SISrcMods::NEG;
+ return CurDAG->getTargetConstant(Mods, SDLoc(N), MVT::i32);
+}]>;
+
+def VOP3PModsNegs : SDNodeXForm<timm, [{
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ if (N->getZExtValue())
+ Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
+ return CurDAG->getTargetConstant(Mods, SDLoc(N), MVT::i32);
+}]>;
+
+def VOP3PModsNegAbs : SDNodeXForm<timm, [{
+ unsigned Val = N->getZExtValue();
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ if (Val == 1)
+ Mods ^= SISrcMods::NEG;
+ if (Val == 2)
+ Mods ^= SISrcMods::ABS;
+ if (Val == 3)
+ Mods ^= (SISrcMods::NEG | SISrcMods::ABS);
+ return CurDAG->getTargetConstant(Mods, SDLoc(N), MVT::i32);
+}]>;
+
class bitextract_imm<int bitnum> : SDNodeXForm<imm, [{
uint64_t Imm = N->getZExtValue();
unsigned Bit = (Imm >> }] # bitnum # [{ ) & 1;
@@ -1647,9 +1673,6 @@ def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">;
def VOP3PMods : ComplexPattern<untyped, 2, "SelectVOP3PMods">;
def VOP3PModsDOT : ComplexPattern<untyped, 2, "SelectVOP3PModsDOT">;
-def VOP3PModsNeg : ComplexPattern<untyped, 1, "SelectVOP3PModsNeg">;
-def VOP3PModsNegs : ComplexPattern<untyped, 1, "SelectVOP3PModsNegs">; // chfang: not use complex pattern?
-def VOP3PModsNegAbs : ComplexPattern<untyped, 1, "SelectVOP3PModsNegAbs">;
def WMMAOpSelVOP3PMods : ComplexPattern<untyped, 1, "SelectWMMAOpSelVOP3PMods">;
def WMMAModsF32NegAbs : ComplexPattern<untyped, 2, "SelectWMMAModsF32NegAbs">;
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 95fcd4ac1c101..9cc68c91369a6 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -557,11 +557,11 @@ multiclass VOP3PDOTIUInst <string OpName, SDPatternOperator intrinsic_node> {
null_frag, 1>;
// Dot-iu instructions consider input as signed if imod neg bits are set. Thus
// Dot-iu Intrinsics have extra operands and require separate codegen pattern.
- def : GCNPat < (intrinsic_node (VOP3PModsNeg i32:$src0_mods), i32:$src0,
- (VOP3PModsNeg i32:$src1_mods), i32:$src1,
+ def : GCNPat < (intrinsic_node timm:$src0_mods, i32:$src0,
+ timm:$src1_mods, i32:$src1,
i32:$src2, (i1 timm:$clamp)),
- (!cast<Instruction>(NAME) $src0_mods, i32:$src0,
- $src1_mods, i32:$src1,
+ (!cast<Instruction>(NAME) (VOP3PModsNeg $src0_mods), i32:$src0,
+ (VOP3PModsNeg $src1_mods), i32:$src1,
(i32 8), i32:$src2, i1:$clamp)
>;
}
@@ -1302,11 +1302,11 @@ class WMMAOpSelPat<Instruction Inst, SDPatternOperator node, VOPProfile P> :
class WMMAUIClampPat<Instruction Inst, SDPatternOperator node, VOPProfile P> :
GCNPat < (P.DstVT (node
- (VOP3PModsNeg i32:$src0_modifiers), (P.Src0VT P.Src0VT:$src0),
- (VOP3PModsNeg i32:$src1_modifiers), (P.Src1VT P.Src1VT:$src1),
+ timm:$src0_modifiers, (P.Src0VT P.Src0VT:$src0),
+ timm:$src1_modifiers, (P.Src1VT P.Src1VT:$src1),
(P.Src2VT P.Src2VT:$src2), (i1 timm:$clamp)
)),
- (P.DstVT (Inst i32:$src0_modifiers, P.Src0VT:$src0, i32:$src1_modifiers, P.Src1VT:$src1, (i32 8), P.Src2VT:$src2, i1:$clamp))
+ (P.DstVT (Inst (VOP3PModsNeg $src0_modifiers), P.Src0VT:$src0, (VOP3PModsNeg $src1_modifiers), P.Src1VT:$src1, (i32 8), P.Src2VT:$src2, i1:$clamp))
>;
class WMMAOpcodeMapping<Instruction TwoAddr, Instruction ThreeAddr> {
@@ -1551,44 +1551,44 @@ class VOP3PWMMA_Profile<list<ValueType> ArgTy, bit _IsSWMMAC, int _IndexType,
bit IsAB_F16_IMod0 = !and(IsAB_F16, !not(HasIModOp));
bit IsAB_F32F64_IMod1 = !and(!or(IsAB_F64, IsAB_F32), HasIModOp);
bit IsAB_F16BF16_IMod1 = !and(!or(IsAB_F16, IsAB_BF16), HasIModOp);
- dag Src0InPat = !cond(IsAB_F32F64_IMod1 : (ins (VOP3PModsNeg i32:$src0_modifiers), Src0VT:$src0),
- IsAB_F16BF16_IMod1 : (ins (VOP3PModsNegs i32:$src0_modifiers), Src0VT:$src0),
+ dag Src0InPat = !cond(IsAB_F32F64_IMod1 : (ins timm:$src0_modifiers, Src0VT:$src0),
+ IsAB_F16BF16_IMod1 : (ins timm:$src0_modifiers, Src0VT:$src0),
IsAB_F16_IMod0 : (ins (Src0VT (WMMAModsF16Neg Src0VT:$src0, i32:$src0_modifiers))),
IsAB_BF16_IMod0 : (ins Src0VT:$src0),
- IsIU : (ins (VOP3PModsNeg i32:$src0_modifiers), Src0VT:$src0),
+ IsIU : (ins timm:$src0_modifiers, Src0VT:$src0),
HasMatrixFMT : (ins timm:$matrix_a_fmt, Src0VT:$src0),
NoABMods : (ins Src0VT:$src0));
- dag Src0OutPat = !cond(IsAB_F32F64_IMod1 : (ins i32:$src0_modifiers, Src0VT:$src0),
- IsAB_F16BF16_IMod1 : (ins i32:$src0_modifiers, Src0VT:$src0),
+ dag Src0OutPat = !cond(IsAB_F32F64_IMod1 : (ins (VOP3PModsNeg $src0_modifiers), Src0VT:$src0),
+ IsAB_F16BF16_IMod1 : (ins (VOP3PModsNegs $src0_modifiers), Src0VT:$src0),
IsAB_F16_IMod0 : (ins i32:$src0_modifiers, Src0VT:$src0),
IsAB_BF16_IMod0 : (ins (i32 8), Src0VT:$src0),
- IsIU : (ins i32:$src0_modifiers, Src0VT:$src0),
+ IsIU : (ins (VOP3PModsNeg $src0_modifiers), Src0VT:$src0),
NoABMods : (ins Src0VT:$src0));
- dag Src1InPat = !cond(IsAB_F32F64_IMod1 : (ins (VOP3PModsNeg i32:$src1_modifiers), Src1VT:$src1),
- IsAB_F16BF16_IMod1 : (ins (VOP3PModsNegs i32:$src1_modifiers), Src1VT:$src1),
+ dag Src1InPat = !cond(IsAB_F32F64_IMod1 : (ins timm:$src1_modifiers, Src1VT:$src1),
+ IsAB_F16BF16_IMod1 : (ins timm:$src1_modifiers, Src1VT:$src1),
IsAB_F16_IMod0 : (ins (Src1VT (WMMAModsF16Neg Src1VT:$src1, i32:$src1_modifiers))),
IsAB_BF16_IMod0 : (ins Src1VT:$src1),
- IsIU : (ins (VOP3PModsNeg i32:$src1_modifiers), Src1VT:$src1),
+ IsIU : (ins timm:$src1_modifiers, Src1VT:$src1),
HasMatrixFMT : (ins timm:$matrix_b_fmt, Src1VT:$src1),
NoABMods : (ins Src1VT:$src1));
- dag Src1OutPat = !cond(IsAB_F32F64_IMod1 : (ins i32:$src1_modifiers, Src1VT:$src1),
- IsAB_F16BF16_IMod1 : (ins i32:$src1_modifiers, Src1VT:$src1),
+ dag Src1OutPat = !cond(IsAB_F32F64_IMod1 : (ins (VOP3PModsNeg $src1_modifiers), Src1VT:$src1),
+ IsAB_F16BF16_IMod1 : (ins (VOP3PModsNegs $src1_modifiers), Src1VT:$src1),
IsAB_F16_IMod0 : (ins i32:$src1_modifiers, Src1VT:$src1),
IsAB_BF16_IMod0 : (ins (i32 8), Src1VT:$src1),
- IsIU : (ins i32:$src1_modifiers, Src1VT:$src1),
+ IsIU : (ins (VOP3PModsNeg $src1_modifiers), Src1VT:$src1),
NoABMods : (ins Src1VT:$src1));
bit IsC_IMod1 = !and(HasIModOp, IsWMMA, !not(IsIU), !not(IsXF32));
bit IsC_F32_IMod0 = !and(IsC_F32, !not(HasIModOp));
bit IsC_F16_IMod0 = !and(IsC_F16, !not(HasIModOp));
bit IsC_BF16_IMod0 = !and(IsC_BF16, !not(HasIModOp));
bit IsIUXF32 = !or(IsIU, IsXF32);
- dag Src2InPatWmma = !cond(IsC_IMod1 : (ins (VOP3PModsNegAbs i32:$src2_modifiers), Src2VT:$src2),
+ dag Src2InPatWmma = !cond(IsC_IMod1 : (ins timm:$src2_modifiers, Src2VT:$src2),
IsC_F32_IMod0 : (ins (Src2VT (WMMAModsF32NegAbs Src2VT:$src2, i32:$src2_modifiers))),
IsC_F16_IMod0 : (ins (Src2VT (WMMAModsF16NegAbs Src2VT:$src2, i32:$src2_modifiers))),
IsC_BF16_IMod0 : (ins Src2VT:$src2),
IsIUXF32 : (ins Src2VT:$src2),
IsSWMMAC : (ins));
- dag Src2OutPatWmma = !cond(IsC_IMod1 : (ins i32:$src2_modifiers, Src2VT:$src2),
+ dag Src2OutPatWmma = !cond(IsC_IMod1 : (ins (VOP3PModsNegAbs $src2_modifiers), Src2VT:$src2),
IsC_F32_IMod0 : (ins i32:$src2_modifiers, Src2VT:$src2),
IsC_F16_IMod0 : (ins i32:$src2_modifiers, Src2VT:$src2),
IsC_BF16_IMod0 : (ins (i32 8), Src2VT:$src2),
@@ -1604,8 +1604,8 @@ class VOP3PWMMA_Profile<list<ValueType> ArgTy, bit _IsSWMMAC, int _IndexType,
...
[truncated]
|
You can test this locally with the following command:git-clang-format --diff HEAD~1 HEAD --extensions h,cpp -- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h View the diff from clang-format here.diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index b7fd131e7..6c1b02ac0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -7065,7 +7065,7 @@ void AMDGPUInstructionSelector::renderVOP3PModsNegAbs(MachineInstrBuilder &MIB,
int OpIdx) const {
unsigned Val = MI.getOperand(OpIdx).getImm();
unsigned Mods = SISrcMods::OP_SEL_1; // default: none
- if (Val == 1) // neg
+ if (Val == 1) // neg
Mods ^= SISrcMods::NEG;
if (Val == 2) // abs
Mods ^= SISrcMods::ABS;
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks reasonable. Do you mind to apply it downstream too?
int OpIdx) const { | ||
unsigned Val = MI.getOperand(OpIdx).getImm(); | ||
unsigned Mods = SISrcMods::OP_SEL_1; | ||
if (Val == 1) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Magic numbers, these are presumably some enum?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Magic numbers, these are presumably some enum?
Yes, SISrcMods::OP_SEL_1 is an enum number, and is widely used in the tablegen at AMDGPU backend.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just realized that you are actually questioning about the immediates from the intrinsic. I am not sure whether we should add enum for them ( we should follow the same rule in all intrinsics). Here I add comments to make them less magic. Thanks.
def VOP3PModsNegAbs : SDNodeXForm<timm, [{ | ||
unsigned Val = N->getZExtValue(); | ||
unsigned Mods = SISrcMods::OP_SEL_1; | ||
if (Val == 1) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Magic numbers
It is not necessary to use ComplexPattern to select VOP3PModsNeg, VOP3PModsNegs
and VOP3PModsNegAbs. We can use SDNodeXForm instead.