1
+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
1
2
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
2
3
3
4
; This test just checks that the compiler doesn't crash.
4
5
5
-
6
- ; FUNC-LABEL: {{^}}i8ptr_v16i8ptr:
7
- ; EG: MEM_RAT_CACHELESS STORE_RAW [[DATA:T[0-9]+\.XYZW]], [[ST_PTR:T[0-9]+\.[XYZW]]]
8
- ; EG: VTX_READ_128 [[DATA]], [[LD_PTR:T[0-9]+\.[XYZW]]]
9
- ; EG-DAG: MOV {{[\* ]*}}[[LD_PTR]], KC0[2].Z
10
- ; EG-DAG: LSHR {{[\* ]*}}[[ST_PTR]], KC0[2].Y, literal
11
6
define amdgpu_kernel void @i8ptr_v16i8ptr (<16 x i8 > addrspace (1 )* %out , i8 addrspace (1 )* %in ) {
7
+ ; EG-LABEL: i8ptr_v16i8ptr:
8
+ ; EG: ; %bb.0: ; %entry
9
+ ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
10
+ ; EG-NEXT: TEX 0 @6
11
+ ; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[]
12
+ ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
13
+ ; EG-NEXT: CF_END
14
+ ; EG-NEXT: PAD
15
+ ; EG-NEXT: Fetch clause starting at 6:
16
+ ; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1
17
+ ; EG-NEXT: ALU clause starting at 8:
18
+ ; EG-NEXT: MOV * T0.X, KC0[2].Z,
19
+ ; EG-NEXT: ALU clause starting at 9:
20
+ ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
21
+ ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
12
22
entry:
13
23
%0 = bitcast i8 addrspace (1 )* %in to <16 x i8 > addrspace (1 )*
14
24
%1 = load <16 x i8 >, <16 x i8 > addrspace (1 )* %0
15
25
store <16 x i8 > %1 , <16 x i8 > addrspace (1 )* %out
16
26
ret void
17
27
}
18
28
19
- ; FUNC-LABEL: {{^}}f32_to_v2i16:
20
- ; EG: MEM_RAT_CACHELESS STORE_RAW [[DATA:T[0-9]+\.[XYZW]]], [[ST_PTR:T[0-9]+\.[XYZW]]]
21
- ; EG: VTX_READ_32 [[DATA]], [[LD_PTR:T[0-9]+\.[XYZW]]]
22
- ; EG-DAG: MOV {{[\* ]*}}[[LD_PTR]], KC0[2].Z
23
- ; EG-DAG: LSHR {{[\* ]*}}[[ST_PTR]], KC0[2].Y, literal
24
29
define amdgpu_kernel void @f32_to_v2i16 (<2 x i16 > addrspace (1 )* %out , float addrspace (1 )* %in ) nounwind {
30
+ ; EG-LABEL: f32_to_v2i16:
31
+ ; EG: ; %bb.0:
32
+ ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
33
+ ; EG-NEXT: TEX 0 @6
34
+ ; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[]
35
+ ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
36
+ ; EG-NEXT: CF_END
37
+ ; EG-NEXT: PAD
38
+ ; EG-NEXT: Fetch clause starting at 6:
39
+ ; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1
40
+ ; EG-NEXT: ALU clause starting at 8:
41
+ ; EG-NEXT: MOV * T0.X, KC0[2].Z,
42
+ ; EG-NEXT: ALU clause starting at 9:
43
+ ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
44
+ ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
25
45
%load = load float , float addrspace (1 )* %in , align 4
26
46
%bc = bitcast float %load to <2 x i16 >
27
47
store <2 x i16 > %bc , <2 x i16 > addrspace (1 )* %out , align 4
28
48
ret void
29
49
}
30
50
31
- ; FUNC-LABEL: {{^}}v2i16_to_f32:
32
- ; EG: MEM_RAT_CACHELESS STORE_RAW [[DATA:T[0-9]+\.[XYZW]]], [[ST_PTR:T[0-9]+\.[XYZW]]]
33
- ; EG: VTX_READ_32 [[DATA]], [[LD_PTR:T[0-9]+\.[XYZW]]]
34
- ; EG-DAG: MOV {{[\* ]*}}[[LD_PTR]], KC0[2].Z
35
- ; EG-DAG: LSHR {{[\* ]*}}[[ST_PTR]], KC0[2].Y, literal
36
51
define amdgpu_kernel void @v2i16_to_f32 (float addrspace (1 )* %out , <2 x i16 > addrspace (1 )* %in ) nounwind {
52
+ ; EG-LABEL: v2i16_to_f32:
53
+ ; EG: ; %bb.0:
54
+ ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
55
+ ; EG-NEXT: TEX 0 @6
56
+ ; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[]
57
+ ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
58
+ ; EG-NEXT: CF_END
59
+ ; EG-NEXT: PAD
60
+ ; EG-NEXT: Fetch clause starting at 6:
61
+ ; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1
62
+ ; EG-NEXT: ALU clause starting at 8:
63
+ ; EG-NEXT: MOV * T0.X, KC0[2].Z,
64
+ ; EG-NEXT: ALU clause starting at 9:
65
+ ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
66
+ ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
37
67
%load = load <2 x i16 >, <2 x i16 > addrspace (1 )* %in , align 4
38
68
%bc = bitcast <2 x i16 > %load to float
39
69
store float %bc , float addrspace (1 )* %out , align 4
40
70
ret void
41
71
}
42
72
43
- ; FUNC-LABEL: {{^}}v4i8_to_i32:
44
- ; EG: MEM_RAT_CACHELESS STORE_RAW [[DATA:T[0-9]+\.[XYZW]]], [[ST_PTR:T[0-9]+\.[XYZW]]]
45
- ; EG: VTX_READ_32 [[DATA]], [[LD_PTR:T[0-9]+\.[XYZW]]]
46
- ; EG-DAG: MOV {{[\* ]*}}[[LD_PTR]], KC0[2].Z
47
- ; EG-DAG: LSHR {{[\* ]*}}[[ST_PTR]], KC0[2].Y, literal
48
73
define amdgpu_kernel void @v4i8_to_i32 (i32 addrspace (1 )* %out , <4 x i8 > addrspace (1 )* %in ) nounwind {
74
+ ; EG-LABEL: v4i8_to_i32:
75
+ ; EG: ; %bb.0:
76
+ ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
77
+ ; EG-NEXT: TEX 0 @6
78
+ ; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[]
79
+ ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
80
+ ; EG-NEXT: CF_END
81
+ ; EG-NEXT: PAD
82
+ ; EG-NEXT: Fetch clause starting at 6:
83
+ ; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1
84
+ ; EG-NEXT: ALU clause starting at 8:
85
+ ; EG-NEXT: MOV * T0.X, KC0[2].Z,
86
+ ; EG-NEXT: ALU clause starting at 9:
87
+ ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
88
+ ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
49
89
%load = load <4 x i8 >, <4 x i8 > addrspace (1 )* %in , align 4
50
90
%bc = bitcast <4 x i8 > %load to i32
51
91
store i32 %bc , i32 addrspace (1 )* %out , align 4
52
92
ret void
53
93
}
54
94
55
- ; FUNC-LABEL: {{^}}i32_to_v4i8:
56
- ; EG: MEM_RAT_CACHELESS STORE_RAW [[DATA:T[0-9]+\.[XYZW]]], [[ST_PTR:T[0-9]+\.[XYZW]]]
57
- ; EG: VTX_READ_32 [[DATA]], [[LD_PTR:T[0-9]+\.[XYZW]]]
58
- ; EG-DAG: MOV {{[\* ]*}}[[LD_PTR]], KC0[2].Z
59
- ; EG-DAG: LSHR {{[\* ]*}}[[ST_PTR]], KC0[2].Y, literal
60
95
define amdgpu_kernel void @i32_to_v4i8 (<4 x i8 > addrspace (1 )* %out , i32 addrspace (1 )* %in ) nounwind {
96
+ ; EG-LABEL: i32_to_v4i8:
97
+ ; EG: ; %bb.0:
98
+ ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
99
+ ; EG-NEXT: TEX 0 @6
100
+ ; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[]
101
+ ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
102
+ ; EG-NEXT: CF_END
103
+ ; EG-NEXT: PAD
104
+ ; EG-NEXT: Fetch clause starting at 6:
105
+ ; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1
106
+ ; EG-NEXT: ALU clause starting at 8:
107
+ ; EG-NEXT: MOV * T0.X, KC0[2].Z,
108
+ ; EG-NEXT: ALU clause starting at 9:
109
+ ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
110
+ ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
61
111
%load = load i32 , i32 addrspace (1 )* %in , align 4
62
112
%bc = bitcast i32 %load to <4 x i8 >
63
113
store <4 x i8 > %bc , <4 x i8 > addrspace (1 )* %out , align 4
64
114
ret void
65
115
}
66
116
67
- ; FUNC-LABEL: {{^}}v2i16_to_v4i8:
68
- ; EG: MEM_RAT_CACHELESS STORE_RAW [[DATA:T[0-9]+\.[XYZW]]], [[ST_PTR:T[0-9]+\.[XYZW]]]
69
- ; EG: VTX_READ_32 [[DATA]], [[LD_PTR:T[0-9]+\.[XYZW]]]
70
- ; EG-DAG: MOV {{[\* ]*}}[[LD_PTR]], KC0[2].Z
71
- ; EG-DAG: LSHR {{[\* ]*}}[[ST_PTR]], KC0[2].Y, literal
72
117
define amdgpu_kernel void @v2i16_to_v4i8 (<4 x i8 > addrspace (1 )* %out , <2 x i16 > addrspace (1 )* %in ) nounwind {
118
+ ; EG-LABEL: v2i16_to_v4i8:
119
+ ; EG: ; %bb.0:
120
+ ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
121
+ ; EG-NEXT: TEX 0 @6
122
+ ; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[]
123
+ ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
124
+ ; EG-NEXT: CF_END
125
+ ; EG-NEXT: PAD
126
+ ; EG-NEXT: Fetch clause starting at 6:
127
+ ; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1
128
+ ; EG-NEXT: ALU clause starting at 8:
129
+ ; EG-NEXT: MOV * T0.X, KC0[2].Z,
130
+ ; EG-NEXT: ALU clause starting at 9:
131
+ ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
132
+ ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
73
133
%load = load <2 x i16 >, <2 x i16 > addrspace (1 )* %in , align 4
74
134
%bc = bitcast <2 x i16 > %load to <4 x i8 >
75
135
store <4 x i8 > %bc , <4 x i8 > addrspace (1 )* %out , align 4
@@ -79,26 +139,62 @@ define amdgpu_kernel void @v2i16_to_v4i8(<4 x i8> addrspace(1)* %out, <2 x i16>
79
139
; This just checks for crash in BUILD_VECTOR/EXTRACT_ELEMENT combine
80
140
; the stack manipulation is tricky to follow
81
141
; TODO: This should only use one load
82
- ; FUNC-LABEL: {{^}}v4i16_extract_i8:
83
- ; EG: MEM_RAT MSKOR {{T[0-9]+\.XW}}, [[ST_PTR:T[0-9]+\.[XYZW]]]
84
- ; EG: VTX_READ_16
85
- ; EG: VTX_READ_16
86
- ; EG-DAG: BFE_UINT
87
- ; EG-DAG: LSHR {{[\* ]*}}[[ST_PTR]], KC0[2].Y, literal
88
142
define amdgpu_kernel void @v4i16_extract_i8 (i8 addrspace (1 )* %out , <4 x i16 > addrspace (1 )* %in ) nounwind {
143
+ ; EG-LABEL: v4i16_extract_i8:
144
+ ; EG: ; %bb.0:
145
+ ; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[]
146
+ ; EG-NEXT: TEX 1 @6
147
+ ; EG-NEXT: ALU 17, @11, KC0[CB0:0-32], KC1[]
148
+ ; EG-NEXT: MEM_RAT MSKOR T5.XW, T6.X
149
+ ; EG-NEXT: CF_END
150
+ ; EG-NEXT: PAD
151
+ ; EG-NEXT: Fetch clause starting at 6:
152
+ ; EG-NEXT: VTX_READ_16 T6.X, T5.X, 6, #1
153
+ ; EG-NEXT: VTX_READ_16 T5.X, T5.X, 4, #1
154
+ ; EG-NEXT: ALU clause starting at 10:
155
+ ; EG-NEXT: MOV * T5.X, KC0[2].Z,
156
+ ; EG-NEXT: ALU clause starting at 11:
157
+ ; EG-NEXT: LSHL * T0.W, T6.X, literal.x,
158
+ ; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
159
+ ; EG-NEXT: OR_INT * T0.W, PV.W, T5.X,
160
+ ; EG-NEXT: MOV * T3.X, PV.W,
161
+ ; EG-NEXT: MOV T0.Y, PV.X,
162
+ ; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x,
163
+ ; EG-NEXT: MOV * T1.W, literal.y,
164
+ ; EG-NEXT: 3(4.203895e-45), 8(1.121039e-44)
165
+ ; EG-NEXT: BFE_UINT T1.W, PV.Y, literal.x, PS,
166
+ ; EG-NEXT: LSHL * T0.W, PV.W, literal.y,
167
+ ; EG-NEXT: 8(1.121039e-44), 3(4.203895e-45)
168
+ ; EG-NEXT: LSHL T5.X, PV.W, PS,
169
+ ; EG-NEXT: LSHL * T5.W, literal.x, PS,
170
+ ; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00)
171
+ ; EG-NEXT: MOV T5.Y, 0.0,
172
+ ; EG-NEXT: MOV * T5.Z, 0.0,
173
+ ; EG-NEXT: LSHR * T6.X, KC0[2].Y, literal.x,
174
+ ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
89
175
%load = load <4 x i16 >, <4 x i16 > addrspace (1 )* %in , align 2
90
176
%bc = bitcast <4 x i16 > %load to <8 x i8 >
91
177
%element = extractelement <8 x i8 > %bc , i32 5
92
178
store i8 %element , i8 addrspace (1 )* %out
93
179
ret void
94
180
}
95
181
96
- ; FUNC-LABEL: {{^}}bitcast_v2i32_to_f64:
97
- ; EG: MEM_RAT_CACHELESS STORE_RAW [[DATA:T[0-9]+\.XY]], [[ST_PTR:T[0-9]+\.[XYZW]]]
98
- ; EG: VTX_READ_64 [[DATA]], [[LD_PTR:T[0-9]+\.[XYZW]]]
99
- ; EG-DAG: MOV {{[\* ]*}}[[LD_PTR]], KC0[2].Z
100
- ; EG-DAG: LSHR {{[\* ]*}}[[ST_PTR]], KC0[2].Y, literal
101
182
define amdgpu_kernel void @bitcast_v2i32_to_f64 (double addrspace (1 )* %out , <2 x i32 > addrspace (1 )* %in ) {
183
+ ; EG-LABEL: bitcast_v2i32_to_f64:
184
+ ; EG: ; %bb.0:
185
+ ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[]
186
+ ; EG-NEXT: TEX 0 @6
187
+ ; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[]
188
+ ; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
189
+ ; EG-NEXT: CF_END
190
+ ; EG-NEXT: PAD
191
+ ; EG-NEXT: Fetch clause starting at 6:
192
+ ; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1
193
+ ; EG-NEXT: ALU clause starting at 8:
194
+ ; EG-NEXT: MOV * T0.X, KC0[2].Z,
195
+ ; EG-NEXT: ALU clause starting at 9:
196
+ ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
197
+ ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
102
198
%val = load <2 x i32 >, <2 x i32 > addrspace (1 )* %in , align 8
103
199
%bc = bitcast <2 x i32 > %val to double
104
200
store double %bc , double addrspace (1 )* %out , align 8
0 commit comments