From 3c1f10d9f750d5374a2f4f7c85a7ce5851ffe46e Mon Sep 17 00:00:00 2001 From: Changpeng Fang Date: Sun, 3 Aug 2025 23:02:20 -0700 Subject: [PATCH 1/5] [AMDGPU] Use SDNodeXForm to select a few VOP3P modifiers, NFC It is not necessary to use ComplexPattern to select VOP3PModsNeg, VOP3PModsNegs and VOP3PModsNegAbs. We can use SDNodeXForm instead. --- llvm/lib/Target/AMDGPU/AMDGPUGISel.td | 19 ++-- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 57 ------------ llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h | 3 - .../AMDGPU/AMDGPUInstructionSelector.cpp | 92 +++++++------------ .../Target/AMDGPU/AMDGPUInstructionSelector.h | 14 +-- llvm/lib/Target/AMDGPU/SIInstrInfo.td | 29 +++++- llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 46 +++++----- 7 files changed, 95 insertions(+), 165 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td index 992572f17e5b9..394a143dd3086 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -51,18 +51,6 @@ def gi_vop3pmodsdot : GIComplexOperandMatcher, GIComplexPatternEquiv; -def gi_vop3pmodsneg : - GIComplexOperandMatcher, - GIComplexPatternEquiv; - -def gi_vop3pmodsnegs : - GIComplexOperandMatcher, - GIComplexPatternEquiv; - -def gi_dotiuvop3pmodsnegabs : - GIComplexOperandMatcher, - GIComplexPatternEquiv; - def gi_wmmaopselvop3pmods : GIComplexOperandMatcher, GIComplexPatternEquiv; @@ -452,6 +440,13 @@ def gi_fp_pow2_to_exponent : GICustomOperandRenderer<"renderFPPow2ToExponent">, def gi_as_hw_round_mode : GICustomOperandRenderer<"renderRoundMode">, GISDNodeXFormEquiv; +def gi_VOP3PModsNeg : GICustomOperandRenderer<"renderVOP3PModsNeg">, + GISDNodeXFormEquiv; +def gi_VOP3PModsNegs : GICustomOperandRenderer<"renderVOP3PModsNegs">, + GISDNodeXFormEquiv; +def gi_VOP3PModsNegAbs : GICustomOperandRenderer<"renderVOP3PModsNegAbs">, + GISDNodeXFormEquiv; + def gi_prefetch_loc : GICustomOperandRenderer<"renderPrefetchLoc">, GISDNodeXFormEquiv; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 39b42002b907a..fb83388e5e265 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -3449,63 +3449,6 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, SDValue &Src, return SelectVOP3PMods(In, Src, SrcMods, true); } -// Select neg_lo from the i1 immediate operand. -bool AMDGPUDAGToDAGISel::SelectVOP3PModsNeg(SDValue In, SDValue &Src) const { - const ConstantSDNode *C = cast(In); - // Literal i1 value set in intrinsic, represents SrcMods for the next operand. - // 1 promotes packed values to signed, 0 treats them as unsigned. - assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value"); - - unsigned Mods = SISrcMods::OP_SEL_1; - unsigned SrcSign = C->getZExtValue(); - if (SrcSign == 1) - Mods ^= SISrcMods::NEG; - - Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); - return true; -} - -// Select both neg_lo and neg_hi from the i1 immediate operand. This is -// specifically for F16/BF16 operands in WMMA instructions, where neg_lo applies -// to matrix's even k elements, and neg_hi applies to matrix's odd k elements. -bool AMDGPUDAGToDAGISel::SelectVOP3PModsNegs(SDValue In, SDValue &Src) const { - const ConstantSDNode *C = cast(In); - // Literal i1 value set in intrinsic, represents SrcMods for the next operand. - // 1 promotes packed values to signed, 0 treats them as unsigned. - assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value"); - - unsigned Mods = SISrcMods::OP_SEL_1; - unsigned SrcSign = C->getZExtValue(); - if (SrcSign == 1) - Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI); - - Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); - return true; -} - -// Select neg, abs, or both neg and abs from the i16 immediate operans. -bool AMDGPUDAGToDAGISel::SelectVOP3PModsNegAbs(SDValue In, SDValue &Src) const { - const ConstantSDNode *C = cast(In); - unsigned Mods = SISrcMods::OP_SEL_1; - unsigned SrcMod = C->getZExtValue(); - switch (SrcMod) { - default: // Any other value will be silently ignored (considered as 0). - break; - case 1: - Mods ^= SISrcMods::NEG; - break; - case 2: - Mods ^= SISrcMods::ABS; - break; - case 3: - Mods ^= (SISrcMods::NEG | SISrcMods::ABS); - break; - } - - Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); - return true; -} - bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const { const ConstantSDNode *C = cast(In); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h index 983f1aa8fab86..16388e750026c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h @@ -241,9 +241,6 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel { bool IsDOT = false) const; bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const; - bool SelectVOP3PModsNeg(SDValue In, SDValue &Src) const; - bool SelectVOP3PModsNegs(SDValue In, SDValue &Src) const; - bool SelectVOP3PModsNegAbs(SDValue In, SDValue &Src) const; bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const; bool SelectWMMAModsF32NegAbs(SDValue In, SDValue &Src, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index b0d3b12471a38..212f4872303de 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -4988,66 +4988,6 @@ AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const { return selectVOP3PRetHelper(Root, true); } -// Select neg_lo from the i1 immediate operand. -InstructionSelector::ComplexRendererFns -AMDGPUInstructionSelector::selectVOP3PModsNeg(MachineOperand &Root) const { - // Literal i1 value set in intrinsic, represents SrcMods for the next operand. - // Value is in Imm operand as i1 sign extended to int64_t. - // 1(-1) promotes packed values to signed, 0 treats them as unsigned. - assert((Root.isImm() && (Root.getImm() == -1 || Root.getImm() == 0)) && - "expected i1 value"); - unsigned Mods = SISrcMods::OP_SEL_1; - if (Root.getImm() == -1) - Mods ^= SISrcMods::NEG; - return {{ - [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods - }}; -} - -// Select both neg_lo and neg_hi from the i1 immediate operand. This is -// specifically for F16/BF16 operands in WMMA instructions, where neg_lo applies -// to matrix's even k elements, and neg_hi applies to matrix's odd k elements. -InstructionSelector::ComplexRendererFns -AMDGPUInstructionSelector::selectVOP3PModsNegs(MachineOperand &Root) const { - // Literal i1 value set in intrinsic, represents SrcMods for the next operand. - // Value is in Imm operand as i1 sign extended to int64_t. - // 1(-1) promotes packed values to signed, 0 treats them as unsigned. - assert((Root.isImm() && (Root.getImm() == -1 || Root.getImm() == 0)) && - "expected i1 value"); - unsigned Mods = SISrcMods::OP_SEL_1; - if (Root.getImm() == -1) - Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI); - return {{ - [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods - }}; -} - -// Select neg, abs, or both neg and abs from the i16 immediate operans. -InstructionSelector::ComplexRendererFns -AMDGPUInstructionSelector::selectVOP3PModsNegAbs(MachineOperand &Root) const { - - assert(Root.isImm() && "Modifier for C must be an immediate"); - - unsigned Mods = SISrcMods::OP_SEL_1; - switch (Root.getImm()) { - default: // Any other value will be silently ignored (considered as 0). - break; - case 1: - Mods ^= SISrcMods::NEG; - break; - case 2: - Mods ^= SISrcMods::ABS; - break; - case 3: - Mods ^= (SISrcMods::NEG | SISrcMods::ABS); - break; - } - - return {{ - [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods - }}; -} - InstructionSelector::ComplexRendererFns AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods( MachineOperand &Root) const { @@ -7102,6 +7042,38 @@ void AMDGPUInstructionSelector::renderRoundMode(MachineInstrBuilder &MIB, MIB.addImm((MI.getOperand(OpIdx).getImm() + 3) % 4); } +void AMDGPUInstructionSelector::renderVOP3PModsNeg(MachineInstrBuilder &MIB, + const MachineInstr &MI, + int OpIdx) const { + unsigned Mods = SISrcMods::OP_SEL_1; + if (MI.getOperand(OpIdx).getImm()) + Mods ^= SISrcMods::NEG; + MIB.addImm((int64_t)Mods); +} + +void AMDGPUInstructionSelector::renderVOP3PModsNegs(MachineInstrBuilder &MIB, + const MachineInstr &MI, + int OpIdx) const { + unsigned Mods = SISrcMods::OP_SEL_1; + if (MI.getOperand(OpIdx).getImm()) + Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI); + MIB.addImm((int64_t)Mods); +} + +void AMDGPUInstructionSelector::renderVOP3PModsNegAbs(MachineInstrBuilder &MIB, + const MachineInstr &MI, + int OpIdx) const { + unsigned Val = MI.getOperand(OpIdx).getImm(); + unsigned Mods = SISrcMods::OP_SEL_1; + if (Val == 1) + Mods ^= SISrcMods::NEG; + if (Val == 2) + Mods ^= SISrcMods::ABS; + if (Val == 3) + Mods ^= (SISrcMods::NEG | SISrcMods::ABS); + MIB.addImm((int64_t)Mods); +} + void AMDGPUInstructionSelector::renderPrefetchLoc(MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 140e753bf976a..d0b5dc5e11e39 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -199,13 +199,6 @@ class AMDGPUInstructionSelector final : public InstructionSelector { InstructionSelector::ComplexRendererFns selectVOP3PModsDOT(MachineOperand &Root) const; - InstructionSelector::ComplexRendererFns - selectVOP3PModsNeg(MachineOperand &Root) const; - InstructionSelector::ComplexRendererFns - selectVOP3PModsNegs(MachineOperand &Root) const; - InstructionSelector::ComplexRendererFns - selectVOP3PModsNegAbs(MachineOperand &Root) const; - InstructionSelector::ComplexRendererFns selectWMMAOpSelVOP3PMods(MachineOperand &Root) const; @@ -419,6 +412,13 @@ class AMDGPUInstructionSelector final : public InstructionSelector { void renderRoundMode(MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const; + void renderVOP3PModsNeg(MachineInstrBuilder &MIB, const MachineInstr &MI, + int OpIdx) const; + void renderVOP3PModsNegs(MachineInstrBuilder &MIB, const MachineInstr &MI, + int OpIdx) const; + void renderVOP3PModsNegAbs(MachineInstrBuilder &MIB, const MachineInstr &MI, + int OpIdx) const; + void renderPrefetchLoc(MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 4698a5805ee0c..4380ff1bf33d4 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -908,6 +908,32 @@ def SupportedRoundMode : TImmLeaf; +def VOP3PModsNeg : SDNodeXFormgetZExtValue()) + Mods ^= SISrcMods::NEG; + return CurDAG->getTargetConstant(Mods, SDLoc(N), MVT::i32); +}]>; + +def VOP3PModsNegs : SDNodeXFormgetZExtValue()) + Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI); + return CurDAG->getTargetConstant(Mods, SDLoc(N), MVT::i32); +}]>; + +def VOP3PModsNegAbs : SDNodeXFormgetZExtValue(); + unsigned Mods = SISrcMods::OP_SEL_1; + if (Val == 1) + Mods ^= SISrcMods::NEG; + if (Val == 2) + Mods ^= SISrcMods::ABS; + if (Val == 3) + Mods ^= (SISrcMods::NEG | SISrcMods::ABS); + return CurDAG->getTargetConstant(Mods, SDLoc(N), MVT::i32); +}]>; + class bitextract_imm : SDNodeXFormgetZExtValue(); unsigned Bit = (Imm >> }] # bitnum # [{ ) & 1; @@ -1647,9 +1673,6 @@ def VOP3OMods : ComplexPattern; def VOP3PMods : ComplexPattern; def VOP3PModsDOT : ComplexPattern; -def VOP3PModsNeg : ComplexPattern; -def VOP3PModsNegs : ComplexPattern; // chfang: not use complex pattern? -def VOP3PModsNegAbs : ComplexPattern; def WMMAOpSelVOP3PMods : ComplexPattern; def WMMAModsF32NegAbs : ComplexPattern; diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index 95fcd4ac1c101..9cc68c91369a6 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -557,11 +557,11 @@ multiclass VOP3PDOTIUInst { null_frag, 1>; // Dot-iu instructions consider input as signed if imod neg bits are set. Thus // Dot-iu Intrinsics have extra operands and require separate codegen pattern. - def : GCNPat < (intrinsic_node (VOP3PModsNeg i32:$src0_mods), i32:$src0, - (VOP3PModsNeg i32:$src1_mods), i32:$src1, + def : GCNPat < (intrinsic_node timm:$src0_mods, i32:$src0, + timm:$src1_mods, i32:$src1, i32:$src2, (i1 timm:$clamp)), - (!cast(NAME) $src0_mods, i32:$src0, - $src1_mods, i32:$src1, + (!cast(NAME) (VOP3PModsNeg $src0_mods), i32:$src0, + (VOP3PModsNeg $src1_mods), i32:$src1, (i32 8), i32:$src2, i1:$clamp) >; } @@ -1302,11 +1302,11 @@ class WMMAOpSelPat : class WMMAUIClampPat : GCNPat < (P.DstVT (node - (VOP3PModsNeg i32:$src0_modifiers), (P.Src0VT P.Src0VT:$src0), - (VOP3PModsNeg i32:$src1_modifiers), (P.Src1VT P.Src1VT:$src1), + timm:$src0_modifiers, (P.Src0VT P.Src0VT:$src0), + timm:$src1_modifiers, (P.Src1VT P.Src1VT:$src1), (P.Src2VT P.Src2VT:$src2), (i1 timm:$clamp) )), - (P.DstVT (Inst i32:$src0_modifiers, P.Src0VT:$src0, i32:$src1_modifiers, P.Src1VT:$src1, (i32 8), P.Src2VT:$src2, i1:$clamp)) + (P.DstVT (Inst (VOP3PModsNeg $src0_modifiers), P.Src0VT:$src0, (VOP3PModsNeg $src1_modifiers), P.Src1VT:$src1, (i32 8), P.Src2VT:$src2, i1:$clamp)) >; class WMMAOpcodeMapping { @@ -1551,44 +1551,44 @@ class VOP3PWMMA_Profile ArgTy, bit _IsSWMMAC, int _IndexType, bit IsAB_F16_IMod0 = !and(IsAB_F16, !not(HasIModOp)); bit IsAB_F32F64_IMod1 = !and(!or(IsAB_F64, IsAB_F32), HasIModOp); bit IsAB_F16BF16_IMod1 = !and(!or(IsAB_F16, IsAB_BF16), HasIModOp); - dag Src0InPat = !cond(IsAB_F32F64_IMod1 : (ins (VOP3PModsNeg i32:$src0_modifiers), Src0VT:$src0), - IsAB_F16BF16_IMod1 : (ins (VOP3PModsNegs i32:$src0_modifiers), Src0VT:$src0), + dag Src0InPat = !cond(IsAB_F32F64_IMod1 : (ins timm:$src0_modifiers, Src0VT:$src0), + IsAB_F16BF16_IMod1 : (ins timm:$src0_modifiers, Src0VT:$src0), IsAB_F16_IMod0 : (ins (Src0VT (WMMAModsF16Neg Src0VT:$src0, i32:$src0_modifiers))), IsAB_BF16_IMod0 : (ins Src0VT:$src0), - IsIU : (ins (VOP3PModsNeg i32:$src0_modifiers), Src0VT:$src0), + IsIU : (ins timm:$src0_modifiers, Src0VT:$src0), HasMatrixFMT : (ins timm:$matrix_a_fmt, Src0VT:$src0), NoABMods : (ins Src0VT:$src0)); - dag Src0OutPat = !cond(IsAB_F32F64_IMod1 : (ins i32:$src0_modifiers, Src0VT:$src0), - IsAB_F16BF16_IMod1 : (ins i32:$src0_modifiers, Src0VT:$src0), + dag Src0OutPat = !cond(IsAB_F32F64_IMod1 : (ins (VOP3PModsNeg $src0_modifiers), Src0VT:$src0), + IsAB_F16BF16_IMod1 : (ins (VOP3PModsNegs $src0_modifiers), Src0VT:$src0), IsAB_F16_IMod0 : (ins i32:$src0_modifiers, Src0VT:$src0), IsAB_BF16_IMod0 : (ins (i32 8), Src0VT:$src0), - IsIU : (ins i32:$src0_modifiers, Src0VT:$src0), + IsIU : (ins (VOP3PModsNeg $src0_modifiers), Src0VT:$src0), NoABMods : (ins Src0VT:$src0)); - dag Src1InPat = !cond(IsAB_F32F64_IMod1 : (ins (VOP3PModsNeg i32:$src1_modifiers), Src1VT:$src1), - IsAB_F16BF16_IMod1 : (ins (VOP3PModsNegs i32:$src1_modifiers), Src1VT:$src1), + dag Src1InPat = !cond(IsAB_F32F64_IMod1 : (ins timm:$src1_modifiers, Src1VT:$src1), + IsAB_F16BF16_IMod1 : (ins timm:$src1_modifiers, Src1VT:$src1), IsAB_F16_IMod0 : (ins (Src1VT (WMMAModsF16Neg Src1VT:$src1, i32:$src1_modifiers))), IsAB_BF16_IMod0 : (ins Src1VT:$src1), - IsIU : (ins (VOP3PModsNeg i32:$src1_modifiers), Src1VT:$src1), + IsIU : (ins timm:$src1_modifiers, Src1VT:$src1), HasMatrixFMT : (ins timm:$matrix_b_fmt, Src1VT:$src1), NoABMods : (ins Src1VT:$src1)); - dag Src1OutPat = !cond(IsAB_F32F64_IMod1 : (ins i32:$src1_modifiers, Src1VT:$src1), - IsAB_F16BF16_IMod1 : (ins i32:$src1_modifiers, Src1VT:$src1), + dag Src1OutPat = !cond(IsAB_F32F64_IMod1 : (ins (VOP3PModsNeg $src1_modifiers), Src1VT:$src1), + IsAB_F16BF16_IMod1 : (ins (VOP3PModsNegs $src1_modifiers), Src1VT:$src1), IsAB_F16_IMod0 : (ins i32:$src1_modifiers, Src1VT:$src1), IsAB_BF16_IMod0 : (ins (i32 8), Src1VT:$src1), - IsIU : (ins i32:$src1_modifiers, Src1VT:$src1), + IsIU : (ins (VOP3PModsNeg $src1_modifiers), Src1VT:$src1), NoABMods : (ins Src1VT:$src1)); bit IsC_IMod1 = !and(HasIModOp, IsWMMA, !not(IsIU), !not(IsXF32)); bit IsC_F32_IMod0 = !and(IsC_F32, !not(HasIModOp)); bit IsC_F16_IMod0 = !and(IsC_F16, !not(HasIModOp)); bit IsC_BF16_IMod0 = !and(IsC_BF16, !not(HasIModOp)); bit IsIUXF32 = !or(IsIU, IsXF32); - dag Src2InPatWmma = !cond(IsC_IMod1 : (ins (VOP3PModsNegAbs i32:$src2_modifiers), Src2VT:$src2), + dag Src2InPatWmma = !cond(IsC_IMod1 : (ins timm:$src2_modifiers, Src2VT:$src2), IsC_F32_IMod0 : (ins (Src2VT (WMMAModsF32NegAbs Src2VT:$src2, i32:$src2_modifiers))), IsC_F16_IMod0 : (ins (Src2VT (WMMAModsF16NegAbs Src2VT:$src2, i32:$src2_modifiers))), IsC_BF16_IMod0 : (ins Src2VT:$src2), IsIUXF32 : (ins Src2VT:$src2), IsSWMMAC : (ins)); - dag Src2OutPatWmma = !cond(IsC_IMod1 : (ins i32:$src2_modifiers, Src2VT:$src2), + dag Src2OutPatWmma = !cond(IsC_IMod1 : (ins (VOP3PModsNegAbs $src2_modifiers), Src2VT:$src2), IsC_F32_IMod0 : (ins i32:$src2_modifiers, Src2VT:$src2), IsC_F16_IMod0 : (ins i32:$src2_modifiers, Src2VT:$src2), IsC_BF16_IMod0 : (ins (i32 8), Src2VT:$src2), @@ -1604,8 +1604,8 @@ class VOP3PWMMA_Profile ArgTy, bit _IsSWMMAC, int _IndexType, !eq(IndexType, 16): (ins i32:$src2, i32:$index_key_16bit), !eq(IndexType, 32): (ins i64:$src2, i32:$index_key_32bit)); dag MatrixFMTOutPat = !if(HasMatrixFMT, (ins i32:$matrix_a_fmt, i32:$matrix_b_fmt), (ins)); - dag Src2InlineInPat = !con(!if(IsC_IMod1, (ins (VOP3PModsNegAbs i32:$src2_modifiers)), (ins)), (ins (Src2VT (WMMAVISrc Src2VT:$src2)))); - dag Src2InlineOutPat = !con(!if(IsIUXF32, (ins), !if(IsC_IMod1, (ins i32:$src2_modifiers), (ins (i32 8)))), (ins Src2VT:$src2)); + dag Src2InlineInPat = !con(!if(IsC_IMod1, (ins timm:$src2_modifiers), (ins)), (ins (Src2VT (WMMAVISrc Src2VT:$src2)))); + dag Src2InlineOutPat = !con(!if(IsIUXF32, (ins), !if(IsC_IMod1, (ins (VOP3PModsNegAbs $src2_modifiers)), (ins (i32 8)))), (ins Src2VT:$src2)); dag MatrixReuseInPat = !if(HasMatrixReuse, (ins timm:$matrix_a_reuse, timm:$matrix_b_reuse), (ins)); dag MatrixReuseOutModPat = !if(HasMatrixReuse, (ins i1:$matrix_a_reuse, i1:$matrix_b_reuse), (ins)); From 25fe5a75df37d101d1b21722804e2e89045e66be Mon Sep 17 00:00:00 2001 From: Changpeng Fang Date: Sun, 3 Aug 2025 23:09:30 -0700 Subject: [PATCH 2/5] [AMDGPU] Fix a wrong indentation --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 212f4872303de..50c65287c708d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -7046,8 +7046,8 @@ void AMDGPUInstructionSelector::renderVOP3PModsNeg(MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const { unsigned Mods = SISrcMods::OP_SEL_1; - if (MI.getOperand(OpIdx).getImm()) - Mods ^= SISrcMods::NEG; + if (MI.getOperand(OpIdx).getImm()) + Mods ^= SISrcMods::NEG; MIB.addImm((int64_t)Mods); } From f9bc1f63344bd158ceeee068dbd337e3be6ae188 Mon Sep 17 00:00:00 2001 From: Changpeng Fang Date: Sun, 3 Aug 2025 23:59:05 -0700 Subject: [PATCH 3/5] [AMDGPU] Fix clang format --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 12 ++++++------ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 50c65287c708d..30d0e0d2da10b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -7043,8 +7043,8 @@ void AMDGPUInstructionSelector::renderRoundMode(MachineInstrBuilder &MIB, } void AMDGPUInstructionSelector::renderVOP3PModsNeg(MachineInstrBuilder &MIB, - const MachineInstr &MI, - int OpIdx) const { + const MachineInstr &MI, + int OpIdx) const { unsigned Mods = SISrcMods::OP_SEL_1; if (MI.getOperand(OpIdx).getImm()) Mods ^= SISrcMods::NEG; @@ -7052,8 +7052,8 @@ void AMDGPUInstructionSelector::renderVOP3PModsNeg(MachineInstrBuilder &MIB, } void AMDGPUInstructionSelector::renderVOP3PModsNegs(MachineInstrBuilder &MIB, - const MachineInstr &MI, - int OpIdx) const { + const MachineInstr &MI, + int OpIdx) const { unsigned Mods = SISrcMods::OP_SEL_1; if (MI.getOperand(OpIdx).getImm()) Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI); @@ -7061,8 +7061,8 @@ void AMDGPUInstructionSelector::renderVOP3PModsNegs(MachineInstrBuilder &MIB, } void AMDGPUInstructionSelector::renderVOP3PModsNegAbs(MachineInstrBuilder &MIB, - const MachineInstr &MI, - int OpIdx) const { + const MachineInstr &MI, + int OpIdx) const { unsigned Val = MI.getOperand(OpIdx).getImm(); unsigned Mods = SISrcMods::OP_SEL_1; if (Val == 1) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index d0b5dc5e11e39..c9da419846ee5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -413,11 +413,11 @@ class AMDGPUInstructionSelector final : public InstructionSelector { int OpIdx) const; void renderVOP3PModsNeg(MachineInstrBuilder &MIB, const MachineInstr &MI, - int OpIdx) const; + int OpIdx) const; void renderVOP3PModsNegs(MachineInstrBuilder &MIB, const MachineInstr &MI, - int OpIdx) const; + int OpIdx) const; void renderVOP3PModsNegAbs(MachineInstrBuilder &MIB, const MachineInstr &MI, - int OpIdx) const; + int OpIdx) const; void renderPrefetchLoc(MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const; From 6206523ccb54f3aef556b4f040013e8f49fdcfff Mon Sep 17 00:00:00 2001 From: Changpeng Fang Date: Mon, 4 Aug 2025 11:41:21 -0700 Subject: [PATCH 4/5] [AMDGPU] Add comments for the numbers in VOP3PModsNegAbs definition --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 8 ++++---- llvm/lib/Target/AMDGPU/SIInstrInfo.td | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 30d0e0d2da10b..512f85068da43 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -7064,12 +7064,12 @@ void AMDGPUInstructionSelector::renderVOP3PModsNegAbs(MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const { unsigned Val = MI.getOperand(OpIdx).getImm(); - unsigned Mods = SISrcMods::OP_SEL_1; - if (Val == 1) + unsigned Mods = SISrcMods::OP_SEL_1; // default: none + if (Val == 1) // neg Mods ^= SISrcMods::NEG; - if (Val == 2) + if (Val == 2) // abs Mods ^= SISrcMods::ABS; - if (Val == 3) + if (Val == 3) // neg and abs Mods ^= (SISrcMods::NEG | SISrcMods::ABS); MIB.addImm((int64_t)Mods); } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 4380ff1bf33d4..44366060cb9cf 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -924,12 +924,12 @@ def VOP3PModsNegs : SDNodeXFormgetZExtValue(); - unsigned Mods = SISrcMods::OP_SEL_1; - if (Val == 1) + unsigned Mods = SISrcMods::OP_SEL_1; // default: none + if (Val == 1) // neg Mods ^= SISrcMods::NEG; - if (Val == 2) + if (Val == 2) // abs Mods ^= SISrcMods::ABS; - if (Val == 3) + if (Val == 3) // neg and abs Mods ^= (SISrcMods::NEG | SISrcMods::ABS); return CurDAG->getTargetConstant(Mods, SDLoc(N), MVT::i32); }]>; From ac822578ba40cd4736588e6acf879eb432a09f60 Mon Sep 17 00:00:00 2001 From: Changpeng Fang Date: Mon, 4 Aug 2025 11:54:08 -0700 Subject: [PATCH 5/5] [AMDGPU] Remove a trailing white space --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 512f85068da43..b7fd131e76056 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -7064,7 +7064,7 @@ void AMDGPUInstructionSelector::renderVOP3PModsNegAbs(MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const { unsigned Val = MI.getOperand(OpIdx).getImm(); - unsigned Mods = SISrcMods::OP_SEL_1; // default: none + unsigned Mods = SISrcMods::OP_SEL_1; // default: none if (Val == 1) // neg Mods ^= SISrcMods::NEG; if (Val == 2) // abs