Skip to content

Commit 8dd88b7

Browse files
[NVVM][NVPTX] Add support for tcgen05.mma
This commit adds support for tcgen05.mma instructions in NVPTX which tests under CodeGen/NVPTX/tcgen05-mma*. This tcgen05.mma instructions are modeled as intrinsics with multiple arguments to model cta_group, mma kind, collector usage etc. The rationale for the design is present documented in NVPTXUsage.rst file
1 parent f1eb869 commit 8dd88b7

13 files changed

+4729
-10
lines changed

llvm/docs/NVPTXUsage.rst

Lines changed: 385 additions & 3 deletions
Large diffs are not rendered by default.

llvm/include/llvm/IR/IntrinsicsNVVM.td

Lines changed: 429 additions & 1 deletion
Large diffs are not rendered by default.

llvm/include/llvm/IR/NVVMIntrinsicUtils.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,15 @@ enum class CTAGroupKind : uint8_t {
4747
CG_2 = 2, // cta_group::2 modifier
4848
};
4949

50+
enum class Tcgen05MMAKind : uint8_t { F16 = 0, TF32 = 1, F8F6F4 = 2, I8 = 3 };
51+
52+
enum class Tcgen05CollectorUsageOp : uint8_t {
53+
DISCARD = 0,
54+
LASTUSE = 1,
55+
FILL = 2,
56+
USE = 3,
57+
};
58+
5059
inline bool FPToIntegerIntrinsicShouldFTZ(Intrinsic::ID IntrinsicID) {
5160
switch (IntrinsicID) {
5261
case Intrinsic::nvvm_f2i_rm_ftz:

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 313 additions & 0 deletions
Large diffs are not rendered by default.

llvm/lib/Target/NVPTX/NVPTXISelLowering.h

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,44 @@ enum NodeType : unsigned {
8484
StoreV2,
8585
StoreV4,
8686
StoreV8,
87-
LAST_MEMORY_OPCODE = StoreV8,
87+
TCGEN05_MMA_SHARED_DISABLE_OUTPUT_LANE_CG1,
88+
TCGEN05_MMA_SHARED_DISABLE_OUTPUT_LANE_CG2,
89+
TCGEN05_MMA_SHARED_F16_SCALE_D_DISABLE_OUTPUT_LANE_CG1,
90+
TCGEN05_MMA_SHARED_F16_SCALE_D_DISABLE_OUTPUT_LANE_CG2,
91+
TCGEN05_MMA_SHARED_TF32_SCALE_D_DISABLE_OUTPUT_LANE_CG1,
92+
TCGEN05_MMA_SHARED_TF32_SCALE_D_DISABLE_OUTPUT_LANE_CG2,
93+
TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG1,
94+
TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG2,
95+
TCGEN05_MMA_TENSOR_F16_SCALE_D_DISABLE_OUTPUT_LANE_CG1,
96+
TCGEN05_MMA_TENSOR_F16_SCALE_D_DISABLE_OUTPUT_LANE_CG2,
97+
TCGEN05_MMA_TENSOR_TF32_SCALE_D_DISABLE_OUTPUT_LANE_CG1,
98+
TCGEN05_MMA_TENSOR_TF32_SCALE_D_DISABLE_OUTPUT_LANE_CG2,
99+
TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG1_ASHIFT,
100+
TCGEN05_MMA_TENSOR_DISABLE_OUTPUT_LANE_CG2_ASHIFT,
101+
TCGEN05_MMA_TENSOR_F16_SCALE_D_DISABLE_OUTPUT_LANE_CG1_ASHIFT,
102+
TCGEN05_MMA_TENSOR_F16_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT,
103+
TCGEN05_MMA_TENSOR_TF32_SCALE_D_DISABLE_OUTPUT_LANE_CG1_ASHIFT,
104+
TCGEN05_MMA_TENSOR_TF32_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT,
105+
TCGEN05_MMA_SP_SHARED_DISABLE_OUTPUT_LANE_CG1,
106+
TCGEN05_MMA_SP_SHARED_DISABLE_OUTPUT_LANE_CG2,
107+
TCGEN05_MMA_SP_SHARED_F16_SCALE_D_DISABLE_OUTPUT_LANE_CG1,
108+
TCGEN05_MMA_SP_SHARED_F16_SCALE_D_DISABLE_OUTPUT_LANE_CG2,
109+
TCGEN05_MMA_SP_SHARED_TF32_SCALE_D_DISABLE_OUTPUT_LANE_CG1,
110+
TCGEN05_MMA_SP_SHARED_TF32_SCALE_D_DISABLE_OUTPUT_LANE_CG2,
111+
TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG1,
112+
TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG2,
113+
TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG1_ASHIFT,
114+
TCGEN05_MMA_SP_TENSOR_DISABLE_OUTPUT_LANE_CG2_ASHIFT,
115+
TCGEN05_MMA_SP_TENSOR_F16_SCALE_D_DISABLE_OUTPUT_LANE_CG1,
116+
TCGEN05_MMA_SP_TENSOR_F16_SCALE_D_DISABLE_OUTPUT_LANE_CG2,
117+
TCGEN05_MMA_SP_TENSOR_TF32_SCALE_D_DISABLE_OUTPUT_LANE_CG1,
118+
TCGEN05_MMA_SP_TENSOR_TF32_SCALE_D_DISABLE_OUTPUT_LANE_CG2,
119+
TCGEN05_MMA_SP_TENSOR_F16_SCALE_D_DISABLE_OUTPUT_LANE_CG1_ASHIFT,
120+
TCGEN05_MMA_SP_TENSOR_F16_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT,
121+
TCGEN05_MMA_SP_TENSOR_TF32_SCALE_D_DISABLE_OUTPUT_LANE_CG1_ASHIFT,
122+
TCGEN05_MMA_SP_TENSOR_TF32_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT,
123+
LAST_MEMORY_OPCODE =
124+
TCGEN05_MMA_SP_TENSOR_TF32_SCALE_D_DISABLE_OUTPUT_LANE_CG2_ASHIFT,
88125
};
89126
}
90127

0 commit comments

Comments
 (0)