Skip to content

[AMDGPU] Use SDNodeXForm to select a few VOP3P modifiers, NFC #151907

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Aug 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 7 additions & 12 deletions llvm/lib/Target/AMDGPU/AMDGPUGISel.td
Original file line number Diff line number Diff line change
Expand Up @@ -51,18 +51,6 @@ def gi_vop3pmodsdot :
GIComplexOperandMatcher<s32, "selectVOP3PModsDOT">,
GIComplexPatternEquiv<VOP3PModsDOT>;

def gi_vop3pmodsneg :
GIComplexOperandMatcher<s32, "selectVOP3PModsNeg">,
GIComplexPatternEquiv<VOP3PModsNeg>;

def gi_vop3pmodsnegs :
GIComplexOperandMatcher<s32, "selectVOP3PModsNegs">,
GIComplexPatternEquiv<VOP3PModsNegs>;

def gi_dotiuvop3pmodsnegabs :
GIComplexOperandMatcher<s32, "selectVOP3PModsNegAbs">,
GIComplexPatternEquiv<VOP3PModsNegAbs>;

def gi_wmmaopselvop3pmods :
GIComplexOperandMatcher<s32, "selectWMMAOpSelVOP3PMods">,
GIComplexPatternEquiv<WMMAOpSelVOP3PMods>;
Expand Down Expand Up @@ -452,6 +440,13 @@ def gi_fp_pow2_to_exponent : GICustomOperandRenderer<"renderFPPow2ToExponent">,
def gi_as_hw_round_mode : GICustomOperandRenderer<"renderRoundMode">,
GISDNodeXFormEquiv<as_hw_round_mode>;

def gi_VOP3PModsNeg : GICustomOperandRenderer<"renderVOP3PModsNeg">,
GISDNodeXFormEquiv<VOP3PModsNeg>;
def gi_VOP3PModsNegs : GICustomOperandRenderer<"renderVOP3PModsNegs">,
GISDNodeXFormEquiv<VOP3PModsNegs>;
def gi_VOP3PModsNegAbs : GICustomOperandRenderer<"renderVOP3PModsNegAbs">,
GISDNodeXFormEquiv<VOP3PModsNegAbs>;

def gi_prefetch_loc : GICustomOperandRenderer<"renderPrefetchLoc">,
GISDNodeXFormEquiv<PrefetchLoc>;

Expand Down
57 changes: 0 additions & 57 deletions llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3449,63 +3449,6 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, SDValue &Src,
return SelectVOP3PMods(In, Src, SrcMods, true);
}

// Select neg_lo from the i1 immediate operand.
bool AMDGPUDAGToDAGISel::SelectVOP3PModsNeg(SDValue In, SDValue &Src) const {
const ConstantSDNode *C = cast<ConstantSDNode>(In);
// Literal i1 value set in intrinsic, represents SrcMods for the next operand.
// 1 promotes packed values to signed, 0 treats them as unsigned.
assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");

unsigned Mods = SISrcMods::OP_SEL_1;
unsigned SrcSign = C->getZExtValue();
if (SrcSign == 1)
Mods ^= SISrcMods::NEG;

Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
return true;
}

// Select both neg_lo and neg_hi from the i1 immediate operand. This is
// specifically for F16/BF16 operands in WMMA instructions, where neg_lo applies
// to matrix's even k elements, and neg_hi applies to matrix's odd k elements.
bool AMDGPUDAGToDAGISel::SelectVOP3PModsNegs(SDValue In, SDValue &Src) const {
const ConstantSDNode *C = cast<ConstantSDNode>(In);
// Literal i1 value set in intrinsic, represents SrcMods for the next operand.
// 1 promotes packed values to signed, 0 treats them as unsigned.
assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");

unsigned Mods = SISrcMods::OP_SEL_1;
unsigned SrcSign = C->getZExtValue();
if (SrcSign == 1)
Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);

Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
return true;
}

// Select neg, abs, or both neg and abs from the i16 immediate operans.
bool AMDGPUDAGToDAGISel::SelectVOP3PModsNegAbs(SDValue In, SDValue &Src) const {
const ConstantSDNode *C = cast<ConstantSDNode>(In);
unsigned Mods = SISrcMods::OP_SEL_1;
unsigned SrcMod = C->getZExtValue();
switch (SrcMod) {
default: // Any other value will be silently ignored (considered as 0).
break;
case 1:
Mods ^= SISrcMods::NEG;
break;
case 2:
Mods ^= SISrcMods::ABS;
break;
case 3:
Mods ^= (SISrcMods::NEG | SISrcMods::ABS);
break;
}

Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
return true;
}

bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
SDValue &Src) const {
const ConstantSDNode *C = cast<ConstantSDNode>(In);
Expand Down
3 changes: 0 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
Original file line number Diff line number Diff line change
Expand Up @@ -241,9 +241,6 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
bool IsDOT = false) const;
bool SelectVOP3PModsDOT(SDValue In, SDValue &Src, SDValue &SrcMods) const;

bool SelectVOP3PModsNeg(SDValue In, SDValue &Src) const;
bool SelectVOP3PModsNegs(SDValue In, SDValue &Src) const;
bool SelectVOP3PModsNegAbs(SDValue In, SDValue &Src) const;
bool SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const;

bool SelectWMMAModsF32NegAbs(SDValue In, SDValue &Src,
Expand Down
92 changes: 32 additions & 60 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4988,66 +4988,6 @@ AMDGPUInstructionSelector::selectVOP3PModsDOT(MachineOperand &Root) const {
return selectVOP3PRetHelper(Root, true);
}

// Select neg_lo from the i1 immediate operand.
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectVOP3PModsNeg(MachineOperand &Root) const {
// Literal i1 value set in intrinsic, represents SrcMods for the next operand.
// Value is in Imm operand as i1 sign extended to int64_t.
// 1(-1) promotes packed values to signed, 0 treats them as unsigned.
assert((Root.isImm() && (Root.getImm() == -1 || Root.getImm() == 0)) &&
"expected i1 value");
unsigned Mods = SISrcMods::OP_SEL_1;
if (Root.getImm() == -1)
Mods ^= SISrcMods::NEG;
return {{
[=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
}};
}

// Select both neg_lo and neg_hi from the i1 immediate operand. This is
// specifically for F16/BF16 operands in WMMA instructions, where neg_lo applies
// to matrix's even k elements, and neg_hi applies to matrix's odd k elements.
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectVOP3PModsNegs(MachineOperand &Root) const {
// Literal i1 value set in intrinsic, represents SrcMods for the next operand.
// Value is in Imm operand as i1 sign extended to int64_t.
// 1(-1) promotes packed values to signed, 0 treats them as unsigned.
assert((Root.isImm() && (Root.getImm() == -1 || Root.getImm() == 0)) &&
"expected i1 value");
unsigned Mods = SISrcMods::OP_SEL_1;
if (Root.getImm() == -1)
Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
return {{
[=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
}};
}

// Select neg, abs, or both neg and abs from the i16 immediate operans.
InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectVOP3PModsNegAbs(MachineOperand &Root) const {

assert(Root.isImm() && "Modifier for C must be an immediate");

unsigned Mods = SISrcMods::OP_SEL_1;
switch (Root.getImm()) {
default: // Any other value will be silently ignored (considered as 0).
break;
case 1:
Mods ^= SISrcMods::NEG;
break;
case 2:
Mods ^= SISrcMods::ABS;
break;
case 3:
Mods ^= (SISrcMods::NEG | SISrcMods::ABS);
break;
}

return {{
[=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
}};
}

InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
MachineOperand &Root) const {
Expand Down Expand Up @@ -7102,6 +7042,38 @@ void AMDGPUInstructionSelector::renderRoundMode(MachineInstrBuilder &MIB,
MIB.addImm((MI.getOperand(OpIdx).getImm() + 3) % 4);
}

void AMDGPUInstructionSelector::renderVOP3PModsNeg(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
unsigned Mods = SISrcMods::OP_SEL_1;
if (MI.getOperand(OpIdx).getImm())
Mods ^= SISrcMods::NEG;
MIB.addImm((int64_t)Mods);
}

void AMDGPUInstructionSelector::renderVOP3PModsNegs(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
unsigned Mods = SISrcMods::OP_SEL_1;
if (MI.getOperand(OpIdx).getImm())
Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
MIB.addImm((int64_t)Mods);
}

void AMDGPUInstructionSelector::renderVOP3PModsNegAbs(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
unsigned Val = MI.getOperand(OpIdx).getImm();
unsigned Mods = SISrcMods::OP_SEL_1; // default: none
if (Val == 1) // neg
Mods ^= SISrcMods::NEG;
if (Val == 2) // abs
Mods ^= SISrcMods::ABS;
if (Val == 3) // neg and abs
Mods ^= (SISrcMods::NEG | SISrcMods::ABS);
MIB.addImm((int64_t)Mods);
}

void AMDGPUInstructionSelector::renderPrefetchLoc(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
Expand Down
14 changes: 7 additions & 7 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
Original file line number Diff line number Diff line change
Expand Up @@ -199,13 +199,6 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
InstructionSelector::ComplexRendererFns
selectVOP3PModsDOT(MachineOperand &Root) const;

InstructionSelector::ComplexRendererFns
selectVOP3PModsNeg(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
selectVOP3PModsNegs(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
selectVOP3PModsNegAbs(MachineOperand &Root) const;

InstructionSelector::ComplexRendererFns
selectWMMAOpSelVOP3PMods(MachineOperand &Root) const;

Expand Down Expand Up @@ -419,6 +412,13 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
void renderRoundMode(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;

void renderVOP3PModsNeg(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;
void renderVOP3PModsNegs(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;
void renderVOP3PModsNegAbs(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;

void renderPrefetchLoc(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;

Expand Down
29 changes: 26 additions & 3 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -908,6 +908,32 @@ def SupportedRoundMode : TImmLeaf<i32, [{
Imm == (int)RoundingMode::TowardNegative;
}]>;

def VOP3PModsNeg : SDNodeXForm<timm, [{
unsigned Mods = SISrcMods::OP_SEL_1;
if (N->getZExtValue())
Mods ^= SISrcMods::NEG;
return CurDAG->getTargetConstant(Mods, SDLoc(N), MVT::i32);
}]>;

def VOP3PModsNegs : SDNodeXForm<timm, [{
unsigned Mods = SISrcMods::OP_SEL_1;
if (N->getZExtValue())
Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
return CurDAG->getTargetConstant(Mods, SDLoc(N), MVT::i32);
}]>;

def VOP3PModsNegAbs : SDNodeXForm<timm, [{
unsigned Val = N->getZExtValue();
unsigned Mods = SISrcMods::OP_SEL_1; // default: none
if (Val == 1) // neg
Mods ^= SISrcMods::NEG;
if (Val == 2) // abs
Mods ^= SISrcMods::ABS;
if (Val == 3) // neg and abs
Mods ^= (SISrcMods::NEG | SISrcMods::ABS);
return CurDAG->getTargetConstant(Mods, SDLoc(N), MVT::i32);
}]>;

class bitextract_imm<int bitnum> : SDNodeXForm<imm, [{
uint64_t Imm = N->getZExtValue();
unsigned Bit = (Imm >> }] # bitnum # [{ ) & 1;
Expand Down Expand Up @@ -1653,9 +1679,6 @@ def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">;
def VOP3PMods : ComplexPattern<untyped, 2, "SelectVOP3PMods">;

def VOP3PModsDOT : ComplexPattern<untyped, 2, "SelectVOP3PModsDOT">;
def VOP3PModsNeg : ComplexPattern<untyped, 1, "SelectVOP3PModsNeg">;
def VOP3PModsNegs : ComplexPattern<untyped, 1, "SelectVOP3PModsNegs">; // chfang: not use complex pattern?
def VOP3PModsNegAbs : ComplexPattern<untyped, 1, "SelectVOP3PModsNegAbs">;
def WMMAOpSelVOP3PMods : ComplexPattern<untyped, 1, "SelectWMMAOpSelVOP3PMods">;

def WMMAModsF32NegAbs : ComplexPattern<untyped, 2, "SelectWMMAModsF32NegAbs">;
Expand Down
46 changes: 23 additions & 23 deletions llvm/lib/Target/AMDGPU/VOP3PInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -557,11 +557,11 @@ multiclass VOP3PDOTIUInst <string OpName, SDPatternOperator intrinsic_node> {
null_frag, 1>;
// Dot-iu instructions consider input as signed if imod neg bits are set. Thus
// Dot-iu Intrinsics have extra operands and require separate codegen pattern.
def : GCNPat < (intrinsic_node (VOP3PModsNeg i32:$src0_mods), i32:$src0,
(VOP3PModsNeg i32:$src1_mods), i32:$src1,
def : GCNPat < (intrinsic_node timm:$src0_mods, i32:$src0,
timm:$src1_mods, i32:$src1,
i32:$src2, (i1 timm:$clamp)),
(!cast<Instruction>(NAME) $src0_mods, i32:$src0,
$src1_mods, i32:$src1,
(!cast<Instruction>(NAME) (VOP3PModsNeg $src0_mods), i32:$src0,
(VOP3PModsNeg $src1_mods), i32:$src1,
(i32 8), i32:$src2, i1:$clamp)
>;
}
Expand Down Expand Up @@ -1302,11 +1302,11 @@ class WMMAOpSelPat<Instruction Inst, SDPatternOperator node, VOPProfile P> :

class WMMAUIClampPat<Instruction Inst, SDPatternOperator node, VOPProfile P> :
GCNPat < (P.DstVT (node
(VOP3PModsNeg i32:$src0_modifiers), (P.Src0VT P.Src0VT:$src0),
(VOP3PModsNeg i32:$src1_modifiers), (P.Src1VT P.Src1VT:$src1),
timm:$src0_modifiers, (P.Src0VT P.Src0VT:$src0),
timm:$src1_modifiers, (P.Src1VT P.Src1VT:$src1),
(P.Src2VT P.Src2VT:$src2), (i1 timm:$clamp)
)),
(P.DstVT (Inst i32:$src0_modifiers, P.Src0VT:$src0, i32:$src1_modifiers, P.Src1VT:$src1, (i32 8), P.Src2VT:$src2, i1:$clamp))
(P.DstVT (Inst (VOP3PModsNeg $src0_modifiers), P.Src0VT:$src0, (VOP3PModsNeg $src1_modifiers), P.Src1VT:$src1, (i32 8), P.Src2VT:$src2, i1:$clamp))
>;

class WMMAOpcodeMapping<Instruction TwoAddr, Instruction ThreeAddr> {
Expand Down Expand Up @@ -1563,44 +1563,44 @@ class VOP3PWMMA_Profile<list<ValueType> ArgTy, bit _IsSWMMAC, int _IndexType,
bit IsAB_F16_IMod0 = !and(IsAB_F16, !not(HasIModOp));
bit IsAB_F32F64_IMod1 = !and(!or(IsAB_F64, IsAB_F32), HasIModOp);
bit IsAB_F16BF16_IMod1 = !and(!or(IsAB_F16, IsAB_BF16), HasIModOp);
dag Src0InPat = !cond(IsAB_F32F64_IMod1 : (ins (VOP3PModsNeg i32:$src0_modifiers), Src0VT:$src0),
IsAB_F16BF16_IMod1 : (ins (VOP3PModsNegs i32:$src0_modifiers), Src0VT:$src0),
dag Src0InPat = !cond(IsAB_F32F64_IMod1 : (ins timm:$src0_modifiers, Src0VT:$src0),
IsAB_F16BF16_IMod1 : (ins timm:$src0_modifiers, Src0VT:$src0),
IsAB_F16_IMod0 : (ins (Src0VT (WMMAModsF16Neg Src0VT:$src0, i32:$src0_modifiers))),
IsAB_BF16_IMod0 : (ins Src0VT:$src0),
IsIU : (ins (VOP3PModsNeg i32:$src0_modifiers), Src0VT:$src0),
IsIU : (ins timm:$src0_modifiers, Src0VT:$src0),
HasMatrixFMT : (ins timm:$matrix_a_fmt, Src0VT:$src0),
NoABMods : (ins Src0VT:$src0));
dag Src0OutPat = !cond(IsAB_F32F64_IMod1 : (ins i32:$src0_modifiers, Src0VT:$src0),
IsAB_F16BF16_IMod1 : (ins i32:$src0_modifiers, Src0VT:$src0),
dag Src0OutPat = !cond(IsAB_F32F64_IMod1 : (ins (VOP3PModsNeg $src0_modifiers), Src0VT:$src0),
IsAB_F16BF16_IMod1 : (ins (VOP3PModsNegs $src0_modifiers), Src0VT:$src0),
IsAB_F16_IMod0 : (ins i32:$src0_modifiers, Src0VT:$src0),
IsAB_BF16_IMod0 : (ins (i32 8), Src0VT:$src0),
IsIU : (ins i32:$src0_modifiers, Src0VT:$src0),
IsIU : (ins (VOP3PModsNeg $src0_modifiers), Src0VT:$src0),
NoABMods : (ins Src0VT:$src0));
dag Src1InPat = !cond(IsAB_F32F64_IMod1 : (ins (VOP3PModsNeg i32:$src1_modifiers), Src1VT:$src1),
IsAB_F16BF16_IMod1 : (ins (VOP3PModsNegs i32:$src1_modifiers), Src1VT:$src1),
dag Src1InPat = !cond(IsAB_F32F64_IMod1 : (ins timm:$src1_modifiers, Src1VT:$src1),
IsAB_F16BF16_IMod1 : (ins timm:$src1_modifiers, Src1VT:$src1),
IsAB_F16_IMod0 : (ins (Src1VT (WMMAModsF16Neg Src1VT:$src1, i32:$src1_modifiers))),
IsAB_BF16_IMod0 : (ins Src1VT:$src1),
IsIU : (ins (VOP3PModsNeg i32:$src1_modifiers), Src1VT:$src1),
IsIU : (ins timm:$src1_modifiers, Src1VT:$src1),
HasMatrixFMT : (ins timm:$matrix_b_fmt, Src1VT:$src1),
NoABMods : (ins Src1VT:$src1));
dag Src1OutPat = !cond(IsAB_F32F64_IMod1 : (ins i32:$src1_modifiers, Src1VT:$src1),
IsAB_F16BF16_IMod1 : (ins i32:$src1_modifiers, Src1VT:$src1),
dag Src1OutPat = !cond(IsAB_F32F64_IMod1 : (ins (VOP3PModsNeg $src1_modifiers), Src1VT:$src1),
IsAB_F16BF16_IMod1 : (ins (VOP3PModsNegs $src1_modifiers), Src1VT:$src1),
IsAB_F16_IMod0 : (ins i32:$src1_modifiers, Src1VT:$src1),
IsAB_BF16_IMod0 : (ins (i32 8), Src1VT:$src1),
IsIU : (ins i32:$src1_modifiers, Src1VT:$src1),
IsIU : (ins (VOP3PModsNeg $src1_modifiers), Src1VT:$src1),
NoABMods : (ins Src1VT:$src1));
bit IsC_IMod1 = !and(HasIModOp, IsWMMA, !not(IsIU), !not(IsXF32));
bit IsC_F32_IMod0 = !and(IsC_F32, !not(HasIModOp));
bit IsC_F16_IMod0 = !and(IsC_F16, !not(HasIModOp));
bit IsC_BF16_IMod0 = !and(IsC_BF16, !not(HasIModOp));
bit IsIUXF32 = !or(IsIU, IsXF32);
dag Src2InPatWmma = !cond(IsC_IMod1 : (ins (VOP3PModsNegAbs i32:$src2_modifiers), Src2VT:$src2),
dag Src2InPatWmma = !cond(IsC_IMod1 : (ins timm:$src2_modifiers, Src2VT:$src2),
IsC_F32_IMod0 : (ins (Src2VT (WMMAModsF32NegAbs Src2VT:$src2, i32:$src2_modifiers))),
IsC_F16_IMod0 : (ins (Src2VT (WMMAModsF16NegAbs Src2VT:$src2, i32:$src2_modifiers))),
IsC_BF16_IMod0 : (ins Src2VT:$src2),
IsIUXF32 : (ins Src2VT:$src2),
IsSWMMAC : (ins));
dag Src2OutPatWmma = !cond(IsC_IMod1 : (ins i32:$src2_modifiers, Src2VT:$src2),
dag Src2OutPatWmma = !cond(IsC_IMod1 : (ins (VOP3PModsNegAbs $src2_modifiers), Src2VT:$src2),
IsC_F32_IMod0 : (ins i32:$src2_modifiers, Src2VT:$src2),
IsC_F16_IMod0 : (ins i32:$src2_modifiers, Src2VT:$src2),
IsC_BF16_IMod0 : (ins (i32 8), Src2VT:$src2),
Expand All @@ -1616,8 +1616,8 @@ class VOP3PWMMA_Profile<list<ValueType> ArgTy, bit _IsSWMMAC, int _IndexType,
!eq(IndexType, 16): (ins i32:$src2, i32:$index_key_16bit),
!eq(IndexType, 32): (ins i64:$src2, i32:$index_key_32bit));
dag MatrixFMTOutPat = !if(HasMatrixFMT, (ins i32:$matrix_a_fmt, i32:$matrix_b_fmt), (ins));
dag Src2InlineInPat = !con(!if(IsC_IMod1, (ins (VOP3PModsNegAbs i32:$src2_modifiers)), (ins)), (ins (Src2VT (WMMAVISrc Src2VT:$src2))));
dag Src2InlineOutPat = !con(!if(IsIUXF32, (ins), !if(IsC_IMod1, (ins i32:$src2_modifiers), (ins (i32 8)))), (ins Src2VT:$src2));
dag Src2InlineInPat = !con(!if(IsC_IMod1, (ins timm:$src2_modifiers), (ins)), (ins (Src2VT (WMMAVISrc Src2VT:$src2))));
dag Src2InlineOutPat = !con(!if(IsIUXF32, (ins), !if(IsC_IMod1, (ins (VOP3PModsNegAbs $src2_modifiers)), (ins (i32 8)))), (ins Src2VT:$src2));
dag MatrixScaleInPat = !if(HasMatrixScale, (ins timm:$matrix_a_scale, timm:$matrix_a_scale_fmt, ScaleTy:$scale_src0,
timm:$matrix_b_scale, timm:$matrix_b_scale_fmt, ScaleTy:$scale_src1),
(ins));
Expand Down
Loading