Skip to content

Commit 547a94f

Browse files
committed
Regenerate bitcast test for upcoming patch.
1 parent 5d86ac8 commit 547a94f

File tree

1 file changed

+138
-42
lines changed

1 file changed

+138
-42
lines changed

llvm/test/CodeGen/AMDGPU/r600.bitcast.ll

Lines changed: 138 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,75 +1,135 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
12
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
23

34
; This test just checks that the compiler doesn't crash.
45

5-
6-
; FUNC-LABEL: {{^}}i8ptr_v16i8ptr:
7-
; EG: MEM_RAT_CACHELESS STORE_RAW [[DATA:T[0-9]+\.XYZW]], [[ST_PTR:T[0-9]+\.[XYZW]]]
8-
; EG: VTX_READ_128 [[DATA]], [[LD_PTR:T[0-9]+\.[XYZW]]]
9-
; EG-DAG: MOV {{[\* ]*}}[[LD_PTR]], KC0[2].Z
10-
; EG-DAG: LSHR {{[\* ]*}}[[ST_PTR]], KC0[2].Y, literal
116
define amdgpu_kernel void @i8ptr_v16i8ptr(<16 x i8> addrspace(1)* %out, i8 addrspace(1)* %in) {
7+
; EG-LABEL: i8ptr_v16i8ptr:
8+
; EG: ; %bb.0: ; %entry
9+
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
10+
; EG-NEXT: TEX 0 @6
11+
; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[]
12+
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
13+
; EG-NEXT: CF_END
14+
; EG-NEXT: PAD
15+
; EG-NEXT: Fetch clause starting at 6:
16+
; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1
17+
; EG-NEXT: ALU clause starting at 8:
18+
; EG-NEXT: MOV * T0.X, KC0[2].Z,
19+
; EG-NEXT: ALU clause starting at 9:
20+
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
21+
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
1222
entry:
1323
%0 = bitcast i8 addrspace(1)* %in to <16 x i8> addrspace(1)*
1424
%1 = load <16 x i8>, <16 x i8> addrspace(1)* %0
1525
store <16 x i8> %1, <16 x i8> addrspace(1)* %out
1626
ret void
1727
}
1828

19-
; FUNC-LABEL: {{^}}f32_to_v2i16:
20-
; EG: MEM_RAT_CACHELESS STORE_RAW [[DATA:T[0-9]+\.[XYZW]]], [[ST_PTR:T[0-9]+\.[XYZW]]]
21-
; EG: VTX_READ_32 [[DATA]], [[LD_PTR:T[0-9]+\.[XYZW]]]
22-
; EG-DAG: MOV {{[\* ]*}}[[LD_PTR]], KC0[2].Z
23-
; EG-DAG: LSHR {{[\* ]*}}[[ST_PTR]], KC0[2].Y, literal
2429
define amdgpu_kernel void @f32_to_v2i16(<2 x i16> addrspace(1)* %out, float addrspace(1)* %in) nounwind {
30+
; EG-LABEL: f32_to_v2i16:
31+
; EG: ; %bb.0:
32+
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
33+
; EG-NEXT: TEX 0 @6
34+
; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[]
35+
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
36+
; EG-NEXT: CF_END
37+
; EG-NEXT: PAD
38+
; EG-NEXT: Fetch clause starting at 6:
39+
; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1
40+
; EG-NEXT: ALU clause starting at 8:
41+
; EG-NEXT: MOV * T0.X, KC0[2].Z,
42+
; EG-NEXT: ALU clause starting at 9:
43+
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
44+
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
2545
%load = load float, float addrspace(1)* %in, align 4
2646
%bc = bitcast float %load to <2 x i16>
2747
store <2 x i16> %bc, <2 x i16> addrspace(1)* %out, align 4
2848
ret void
2949
}
3050

31-
; FUNC-LABEL: {{^}}v2i16_to_f32:
32-
; EG: MEM_RAT_CACHELESS STORE_RAW [[DATA:T[0-9]+\.[XYZW]]], [[ST_PTR:T[0-9]+\.[XYZW]]]
33-
; EG: VTX_READ_32 [[DATA]], [[LD_PTR:T[0-9]+\.[XYZW]]]
34-
; EG-DAG: MOV {{[\* ]*}}[[LD_PTR]], KC0[2].Z
35-
; EG-DAG: LSHR {{[\* ]*}}[[ST_PTR]], KC0[2].Y, literal
3651
define amdgpu_kernel void @v2i16_to_f32(float addrspace(1)* %out, <2 x i16> addrspace(1)* %in) nounwind {
52+
; EG-LABEL: v2i16_to_f32:
53+
; EG: ; %bb.0:
54+
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
55+
; EG-NEXT: TEX 0 @6
56+
; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[]
57+
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
58+
; EG-NEXT: CF_END
59+
; EG-NEXT: PAD
60+
; EG-NEXT: Fetch clause starting at 6:
61+
; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1
62+
; EG-NEXT: ALU clause starting at 8:
63+
; EG-NEXT: MOV * T0.X, KC0[2].Z,
64+
; EG-NEXT: ALU clause starting at 9:
65+
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
66+
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
3767
%load = load <2 x i16>, <2 x i16> addrspace(1)* %in, align 4
3868
%bc = bitcast <2 x i16> %load to float
3969
store float %bc, float addrspace(1)* %out, align 4
4070
ret void
4171
}
4272

43-
; FUNC-LABEL: {{^}}v4i8_to_i32:
44-
; EG: MEM_RAT_CACHELESS STORE_RAW [[DATA:T[0-9]+\.[XYZW]]], [[ST_PTR:T[0-9]+\.[XYZW]]]
45-
; EG: VTX_READ_32 [[DATA]], [[LD_PTR:T[0-9]+\.[XYZW]]]
46-
; EG-DAG: MOV {{[\* ]*}}[[LD_PTR]], KC0[2].Z
47-
; EG-DAG: LSHR {{[\* ]*}}[[ST_PTR]], KC0[2].Y, literal
4873
define amdgpu_kernel void @v4i8_to_i32(i32 addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind {
74+
; EG-LABEL: v4i8_to_i32:
75+
; EG: ; %bb.0:
76+
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
77+
; EG-NEXT: TEX 0 @6
78+
; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[]
79+
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
80+
; EG-NEXT: CF_END
81+
; EG-NEXT: PAD
82+
; EG-NEXT: Fetch clause starting at 6:
83+
; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1
84+
; EG-NEXT: ALU clause starting at 8:
85+
; EG-NEXT: MOV * T0.X, KC0[2].Z,
86+
; EG-NEXT: ALU clause starting at 9:
87+
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
88+
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
4989
%load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4
5090
%bc = bitcast <4 x i8> %load to i32
5191
store i32 %bc, i32 addrspace(1)* %out, align 4
5292
ret void
5393
}
5494

55-
; FUNC-LABEL: {{^}}i32_to_v4i8:
56-
; EG: MEM_RAT_CACHELESS STORE_RAW [[DATA:T[0-9]+\.[XYZW]]], [[ST_PTR:T[0-9]+\.[XYZW]]]
57-
; EG: VTX_READ_32 [[DATA]], [[LD_PTR:T[0-9]+\.[XYZW]]]
58-
; EG-DAG: MOV {{[\* ]*}}[[LD_PTR]], KC0[2].Z
59-
; EG-DAG: LSHR {{[\* ]*}}[[ST_PTR]], KC0[2].Y, literal
6095
define amdgpu_kernel void @i32_to_v4i8(<4 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
96+
; EG-LABEL: i32_to_v4i8:
97+
; EG: ; %bb.0:
98+
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
99+
; EG-NEXT: TEX 0 @6
100+
; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[]
101+
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
102+
; EG-NEXT: CF_END
103+
; EG-NEXT: PAD
104+
; EG-NEXT: Fetch clause starting at 6:
105+
; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1
106+
; EG-NEXT: ALU clause starting at 8:
107+
; EG-NEXT: MOV * T0.X, KC0[2].Z,
108+
; EG-NEXT: ALU clause starting at 9:
109+
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
110+
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
61111
%load = load i32, i32 addrspace(1)* %in, align 4
62112
%bc = bitcast i32 %load to <4 x i8>
63113
store <4 x i8> %bc, <4 x i8> addrspace(1)* %out, align 4
64114
ret void
65115
}
66116

67-
; FUNC-LABEL: {{^}}v2i16_to_v4i8:
68-
; EG: MEM_RAT_CACHELESS STORE_RAW [[DATA:T[0-9]+\.[XYZW]]], [[ST_PTR:T[0-9]+\.[XYZW]]]
69-
; EG: VTX_READ_32 [[DATA]], [[LD_PTR:T[0-9]+\.[XYZW]]]
70-
; EG-DAG: MOV {{[\* ]*}}[[LD_PTR]], KC0[2].Z
71-
; EG-DAG: LSHR {{[\* ]*}}[[ST_PTR]], KC0[2].Y, literal
72117
define amdgpu_kernel void @v2i16_to_v4i8(<4 x i8> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) nounwind {
118+
; EG-LABEL: v2i16_to_v4i8:
119+
; EG: ; %bb.0:
120+
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
121+
; EG-NEXT: TEX 0 @6
122+
; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[]
123+
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
124+
; EG-NEXT: CF_END
125+
; EG-NEXT: PAD
126+
; EG-NEXT: Fetch clause starting at 6:
127+
; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1
128+
; EG-NEXT: ALU clause starting at 8:
129+
; EG-NEXT: MOV * T0.X, KC0[2].Z,
130+
; EG-NEXT: ALU clause starting at 9:
131+
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
132+
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
73133
%load = load <2 x i16>, <2 x i16> addrspace(1)* %in, align 4
74134
%bc = bitcast <2 x i16> %load to <4 x i8>
75135
store <4 x i8> %bc, <4 x i8> addrspace(1)* %out, align 4
@@ -79,26 +139,62 @@ define amdgpu_kernel void @v2i16_to_v4i8(<4 x i8> addrspace(1)* %out, <2 x i16>
79139
; This just checks for crash in BUILD_VECTOR/EXTRACT_ELEMENT combine
80140
; the stack manipulation is tricky to follow
81141
; TODO: This should only use one load
82-
; FUNC-LABEL: {{^}}v4i16_extract_i8:
83-
; EG: MEM_RAT MSKOR {{T[0-9]+\.XW}}, [[ST_PTR:T[0-9]+\.[XYZW]]]
84-
; EG: VTX_READ_16
85-
; EG: VTX_READ_16
86-
; EG-DAG: BFE_UINT
87-
; EG-DAG: LSHR {{[\* ]*}}[[ST_PTR]], KC0[2].Y, literal
88142
define amdgpu_kernel void @v4i16_extract_i8(i8 addrspace(1)* %out, <4 x i16> addrspace(1)* %in) nounwind {
143+
; EG-LABEL: v4i16_extract_i8:
144+
; EG: ; %bb.0:
145+
; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
146+
; EG-NEXT: TEX 1 @6
147+
; EG-NEXT: ALU 17, @11, KC0[CB0:0-32], KC1[]
148+
; EG-NEXT: MEM_RAT MSKOR T5.XW, T6.X
149+
; EG-NEXT: CF_END
150+
; EG-NEXT: PAD
151+
; EG-NEXT: Fetch clause starting at 6:
152+
; EG-NEXT: VTX_READ_16 T6.X, T5.X, 6, #1
153+
; EG-NEXT: VTX_READ_16 T5.X, T5.X, 4, #1
154+
; EG-NEXT: ALU clause starting at 10:
155+
; EG-NEXT: MOV * T5.X, KC0[2].Z,
156+
; EG-NEXT: ALU clause starting at 11:
157+
; EG-NEXT: LSHL * T0.W, T6.X, literal.x,
158+
; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
159+
; EG-NEXT: OR_INT * T0.W, PV.W, T5.X,
160+
; EG-NEXT: MOV * T3.X, PV.W,
161+
; EG-NEXT: MOV T0.Y, PV.X,
162+
; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x,
163+
; EG-NEXT: MOV * T1.W, literal.y,
164+
; EG-NEXT: 3(4.203895e-45), 8(1.121039e-44)
165+
; EG-NEXT: BFE_UINT T1.W, PV.Y, literal.x, PS,
166+
; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
167+
; EG-NEXT: 8(1.121039e-44), 3(4.203895e-45)
168+
; EG-NEXT: LSHL T5.X, PV.W, PS,
169+
; EG-NEXT: LSHL * T5.W, literal.x, PS,
170+
; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00)
171+
; EG-NEXT: MOV T5.Y, 0.0,
172+
; EG-NEXT: MOV * T5.Z, 0.0,
173+
; EG-NEXT: LSHR * T6.X, KC0[2].Y, literal.x,
174+
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
89175
%load = load <4 x i16>, <4 x i16> addrspace(1)* %in, align 2
90176
%bc = bitcast <4 x i16> %load to <8 x i8>
91177
%element = extractelement <8 x i8> %bc, i32 5
92178
store i8 %element, i8 addrspace(1)* %out
93179
ret void
94180
}
95181

96-
; FUNC-LABEL: {{^}}bitcast_v2i32_to_f64:
97-
; EG: MEM_RAT_CACHELESS STORE_RAW [[DATA:T[0-9]+\.XY]], [[ST_PTR:T[0-9]+\.[XYZW]]]
98-
; EG: VTX_READ_64 [[DATA]], [[LD_PTR:T[0-9]+\.[XYZW]]]
99-
; EG-DAG: MOV {{[\* ]*}}[[LD_PTR]], KC0[2].Z
100-
; EG-DAG: LSHR {{[\* ]*}}[[ST_PTR]], KC0[2].Y, literal
101182
define amdgpu_kernel void @bitcast_v2i32_to_f64(double addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
183+
; EG-LABEL: bitcast_v2i32_to_f64:
184+
; EG: ; %bb.0:
185+
; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
186+
; EG-NEXT: TEX 0 @6
187+
; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[]
188+
; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
189+
; EG-NEXT: CF_END
190+
; EG-NEXT: PAD
191+
; EG-NEXT: Fetch clause starting at 6:
192+
; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1
193+
; EG-NEXT: ALU clause starting at 8:
194+
; EG-NEXT: MOV * T0.X, KC0[2].Z,
195+
; EG-NEXT: ALU clause starting at 9:
196+
; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
197+
; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
102198
%val = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 8
103199
%bc = bitcast <2 x i32> %val to double
104200
store double %bc, double addrspace(1)* %out, align 8

0 commit comments

Comments
 (0)