Skip to content

Commit a24fae3

Browse files
authored
[AMDGPU][True16][GlobalISel] Fix v2*16 build_vector patterns (#151496)
- Pattern with IMPLICIT_DEF failed to generate an entry in MatchTable and did not report an error, just silently failed. This is fixed by casting IMPLICIT_DEF to appropriate type. This also fixes selecting "build_vector s16, undef" for GlobalISel with True16. - Add pattern for "build_vector undef, s16" that will work for GlobalISel. True16 GlobalISel has a G_TRUNC that it needs to deal with. - Use REG_SEQUENCE for Real16 patterns instead of V_LSHLREV_B32_e64 to generate more optimal code.
1 parent 8cc4c6d commit a24fae3

File tree

10 files changed

+675
-176
lines changed

10 files changed

+675
-176
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3543,14 +3543,21 @@ def : GCNPat <
35433543
(vecTy (UniformBinFrag<build_vector> (Ty undef), (Ty SReg_32:$src1))),
35443544
(S_LSHL_B32 SReg_32:$src1, (i32 16))
35453545
>;
3546-
}
35473546

35483547
def : GCNPat <
35493548
(vecTy (DivergentBinFrag<build_vector> (Ty undef), (Ty VGPR_32:$src1))),
35503549
(vecTy (V_LSHLREV_B32_e64 (i32 16), VGPR_32:$src1))
35513550
>;
3551+
} // End True16Predicate = ...
35523552
} // End foreach Ty = ...
3553-
}
3553+
} // End AddedComplexity = 1
3554+
3555+
let True16Predicate = UseRealTrue16Insts in
3556+
def : GCNPat <
3557+
(v2i16 (DivergentBinFrag<build_vector> (i16 undef), (i16 (trunc i32:$src1)))),
3558+
(REG_SEQUENCE VGPR_32, (i16 (IMPLICIT_DEF)), lo16,
3559+
(i16 (EXTRACT_SUBREG VGPR_32:$src1, lo16)), hi16)
3560+
>;
35543561

35553562
let SubtargetPredicate = HasVOP3PInsts in {
35563563
foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in
@@ -3599,7 +3606,11 @@ def : GCNPat <
35993606
>;
36003607
def : GCNPat <
36013608
(vecTy (DivergentBinFrag<build_vector> (Ty VGPR_16:$src0), (Ty undef))),
3602-
(REG_SEQUENCE VGPR_32, $src0, lo16, (IMPLICIT_DEF), hi16)
3609+
(REG_SEQUENCE VGPR_32, $src0, lo16, (Ty (IMPLICIT_DEF)), hi16)
3610+
>;
3611+
def : GCNPat <
3612+
(vecTy (DivergentBinFrag<build_vector> (Ty undef), (Ty VGPR_16:$src1))),
3613+
(REG_SEQUENCE VGPR_32, (Ty (IMPLICIT_DEF)), lo16, (Ty VGPR_16:$src1), hi16)
36033614
>;
36043615
}
36053616

0 commit comments

Comments
 (0)