Skip to content

Commit ba0936c

Browse files
committed
[AArch64] Fix post-inc stores of floating-point conversions
The commit at #147707 introduced a bug because of missing patterns for post-inc stores where the input is a vector_extract with i64 types. Additionally, remove the early pre-legalization early-exit as it can miss its opportunity to apply the optimization.
1 parent a194d51 commit ba0936c

File tree

3 files changed

+132
-3
lines changed

3 files changed

+132
-3
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24135,9 +24135,6 @@ static SDValue combineStoreValueFPToInt(StoreSDNode *ST,
2413524135
TargetLowering::DAGCombinerInfo &DCI,
2413624136
SelectionDAG &DAG,
2413724137
const AArch64Subtarget *Subtarget) {
24138-
// Limit to post-legalization in order to avoid peeling truncating stores.
24139-
if (DCI.isBeforeLegalize())
24140-
return SDValue();
2414124138
if (!Subtarget->isNeonAvailable())
2414224139
return SDValue();
2414324140
// Source operand is already a vector.

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9273,8 +9273,12 @@ multiclass St1LanePost128Pat<SDPatternOperator scalar_store, Operand VecIndex,
92739273

92749274
defm : St1LanePost128Pat<post_truncsti8, VectorIndexB, v16i8, i32, ST1i8_POST,
92759275
1>;
9276+
defm : St1LanePost128Pat<post_truncsti8, VectorIndexB, v16i8, i64, ST1i8_POST,
9277+
1>;
92769278
defm : St1LanePost128Pat<post_truncsti16, VectorIndexH, v8i16, i32, ST1i16_POST,
92779279
2>;
9280+
defm : St1LanePost128Pat<post_truncsti16, VectorIndexH, v8i16, i64, ST1i16_POST,
9281+
2>;
92789282
defm : St1LanePost128Pat<post_store, VectorIndexS, v4i32, i32, ST1i32_POST, 4>;
92799283
defm : St1LanePost128Pat<post_store, VectorIndexS, v4f32, f32, ST1i32_POST, 4>;
92809284
defm : St1LanePost128Pat<post_store, VectorIndexD, v2i64, i64, ST1i64_POST, 8>;

llvm/test/CodeGen/AArch64/store-float-conversion.ll

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,20 @@ entry:
2727
ret void
2828
}
2929

30+
define ptr @f32_to_s8_inc(float %f, ptr %dst) {
31+
; CHECK-LABEL: f32_to_s8_inc:
32+
; CHECK: // %bb.0: // %entry
33+
; CHECK-NEXT: fcvtzs s0, s0
34+
; CHECK-NEXT: st1 { v0.b }[0], [x0], #1
35+
; CHECK-NEXT: ret
36+
entry:
37+
%conv = fptosi float %f to i32
38+
%trunc = trunc i32 %conv to i8
39+
%next = getelementptr i8, ptr %dst, i64 1
40+
store i8 %trunc, ptr %dst
41+
ret ptr %next
42+
}
43+
3044
define void @f32_to_u16(float %f, ptr %dst) {
3145
; CHECK-LABEL: f32_to_u16:
3246
; CHECK: // %bb.0: // %entry
@@ -53,6 +67,20 @@ entry:
5367
ret void
5468
}
5569

70+
define ptr @f32_to_s16_inc(float %f, ptr %dst) {
71+
; CHECK-LABEL: f32_to_s16_inc:
72+
; CHECK: // %bb.0: // %entry
73+
; CHECK-NEXT: fcvtzs s0, s0
74+
; CHECK-NEXT: st1 { v0.h }[0], [x0], #2
75+
; CHECK-NEXT: ret
76+
entry:
77+
%conv = fptosi float %f to i32
78+
%trunc = trunc i32 %conv to i16
79+
%next = getelementptr i16, ptr %dst, i64 1
80+
store i16 %trunc, ptr %dst
81+
ret ptr %next
82+
}
83+
5684
define void @f32_to_u32(float %f, ptr %dst) {
5785
; CHECK-LABEL: f32_to_u32:
5886
; CHECK: // %bb.0: // %entry
@@ -77,6 +105,19 @@ entry:
77105
ret void
78106
}
79107

108+
define ptr @f32_to_s32_inc(float %f, ptr %dst) {
109+
; CHECK-LABEL: f32_to_s32_inc:
110+
; CHECK: // %bb.0: // %entry
111+
; CHECK-NEXT: fcvtzs s0, s0
112+
; CHECK-NEXT: st1 { v0.s }[0], [x0], #4
113+
; CHECK-NEXT: ret
114+
entry:
115+
%conv = fptosi float %f to i32
116+
%next = getelementptr i32, ptr %dst, i64 1
117+
store i32 %conv, ptr %dst
118+
ret ptr %next
119+
}
120+
80121
define void @f32_to_s64(float %f, ptr %dst) {
81122
; CHECK-LABEL: f32_to_s64:
82123
; CHECK: // %bb.0: // %entry
@@ -115,6 +156,93 @@ entry:
115156
ret void
116157
}
117158

159+
define ptr @f64_to_s64_inc(double %d, ptr %dst) {
160+
; CHECK-LABEL: f64_to_s64_inc:
161+
; CHECK: // %bb.0: // %entry
162+
; CHECK-NEXT: fcvtzs d0, d0
163+
; CHECK-NEXT: st1 { v0.d }[0], [x0], #8
164+
; CHECK-NEXT: ret
165+
entry:
166+
%conv = fptosi double %d to i64
167+
%next = getelementptr i64, ptr %dst, i64 1
168+
store i64 %conv, ptr %dst
169+
ret ptr %next
170+
}
171+
172+
define void @f64_to_u8(double %d, ptr %dst) {
173+
; CHECK-LABEL: f64_to_u8:
174+
; CHECK: // %bb.0:
175+
; CHECK-NEXT: fcvtzu d0, d0
176+
; CHECK-NEXT: str b0, [x0]
177+
; CHECK-NEXT: ret
178+
%conv = fptoui double %d to i64
179+
%trunc = trunc i64 %conv to i8
180+
store i8 %trunc, ptr %dst
181+
ret void
182+
}
183+
184+
define void @f64_to_s8(double %d, ptr %dst) {
185+
; CHECK-LABEL: f64_to_s8:
186+
; CHECK: // %bb.0:
187+
; CHECK-NEXT: fcvtzs d0, d0
188+
; CHECK-NEXT: str b0, [x0]
189+
; CHECK-NEXT: ret
190+
%conv = fptosi double %d to i64
191+
%trunc = trunc i64 %conv to i8
192+
store i8 %trunc, ptr %dst
193+
ret void
194+
}
195+
196+
define ptr @f64_to_s8_inc(double %d, ptr %dst) {
197+
; CHECK-LABEL: f64_to_s8_inc:
198+
; CHECK: // %bb.0:
199+
; CHECK-NEXT: fcvtzs d0, d0
200+
; CHECK-NEXT: st1 { v0.b }[0], [x0], #1
201+
; CHECK-NEXT: ret
202+
%conv = fptosi double %d to i64
203+
%trunc = trunc i64 %conv to i8
204+
store i8 %trunc, ptr %dst
205+
%next = getelementptr i8, ptr %dst, i64 1
206+
ret ptr %next
207+
}
208+
209+
define void @f64_to_u16(double %d, ptr %dst) {
210+
; CHECK-LABEL: f64_to_u16:
211+
; CHECK: // %bb.0:
212+
; CHECK-NEXT: fcvtzu d0, d0
213+
; CHECK-NEXT: str h0, [x0]
214+
; CHECK-NEXT: ret
215+
%conv = fptoui double %d to i64
216+
%trunc = trunc i64 %conv to i16
217+
store i16 %trunc, ptr %dst
218+
ret void
219+
}
220+
221+
define void @f64_to_s16(double %d, ptr %dst) {
222+
; CHECK-LABEL: f64_to_s16:
223+
; CHECK: // %bb.0:
224+
; CHECK-NEXT: fcvtzs d0, d0
225+
; CHECK-NEXT: str h0, [x0]
226+
; CHECK-NEXT: ret
227+
%conv = fptosi double %d to i64
228+
%trunc = trunc i64 %conv to i16
229+
store i16 %trunc, ptr %dst
230+
ret void
231+
}
232+
233+
define ptr @f64_to_s16_inc(double %d, ptr %dst) {
234+
; CHECK-LABEL: f64_to_s16_inc:
235+
; CHECK: // %bb.0:
236+
; CHECK-NEXT: fcvtzs d0, d0
237+
; CHECK-NEXT: st1 { v0.h }[0], [x0], #2
238+
; CHECK-NEXT: ret
239+
%conv = fptosi double %d to i64
240+
%trunc = trunc i64 %conv to i16
241+
%next = getelementptr i16, ptr %dst, i64 1
242+
store i16 %trunc, ptr %dst
243+
ret ptr %next
244+
}
245+
118246
define i32 @f32_to_i32_multiple_uses(float %f, ptr %dst) {
119247
; CHECK-LABEL: f32_to_i32_multiple_uses:
120248
; CHECK: // %bb.0: // %entry

0 commit comments

Comments
 (0)