@@ -4959,15 +4959,15 @@ define amdgpu_ps i64 @s_fshl_i64_5(i64 inreg %lhs, i64 inreg %rhs) {
4959
4959
; GCN: ; %bb.0:
4960
4960
; GCN-NEXT: s_lshl_b64 s[0:1], s[0:1], 5
4961
4961
; GCN-NEXT: s_lshr_b32 s2, s3, 27
4962
- ; GCN-NEXT: s_or_b32 s0, s2, s0
4962
+ ; GCN-NEXT: s_or_b32 s0, s0, s2
4963
4963
; GCN-NEXT: ; return to shader part epilog
4964
4964
;
4965
4965
; GFX11-LABEL: s_fshl_i64_5:
4966
4966
; GFX11: ; %bb.0:
4967
4967
; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 5
4968
4968
; GFX11-NEXT: s_lshr_b32 s2, s3, 27
4969
4969
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
4970
- ; GFX11-NEXT: s_or_b32 s0, s2, s0
4970
+ ; GFX11-NEXT: s_or_b32 s0, s0, s2
4971
4971
; GFX11-NEXT: ; return to shader part epilog
4972
4972
%result = call i64 @llvm.fshl.i64 (i64 %lhs , i64 %rhs , i64 5 )
4973
4973
ret i64 %result
@@ -5088,31 +5088,31 @@ define i64 @v_fshl_i64_5(i64 %lhs, i64 %rhs) {
5088
5088
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5089
5089
; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 5
5090
5090
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 27, v3
5091
- ; GFX6-NEXT: v_or_b32_e32 v0, v2, v0
5091
+ ; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
5092
5092
; GFX6-NEXT: s_setpc_b64 s[30:31]
5093
5093
;
5094
5094
; GFX8-LABEL: v_fshl_i64_5:
5095
5095
; GFX8: ; %bb.0:
5096
5096
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5097
5097
; GFX8-NEXT: v_lshlrev_b64 v[0:1], 5, v[0:1]
5098
5098
; GFX8-NEXT: v_lshrrev_b32_e32 v2, 27, v3
5099
- ; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
5099
+ ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2
5100
5100
; GFX8-NEXT: s_setpc_b64 s[30:31]
5101
5101
;
5102
5102
; GFX9-LABEL: v_fshl_i64_5:
5103
5103
; GFX9: ; %bb.0:
5104
5104
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5105
5105
; GFX9-NEXT: v_lshlrev_b64 v[0:1], 5, v[0:1]
5106
5106
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 27, v3
5107
- ; GFX9-NEXT: v_or_b32_e32 v0, v2, v0
5107
+ ; GFX9-NEXT: v_or_b32_e32 v0, v0, v2
5108
5108
; GFX9-NEXT: s_setpc_b64 s[30:31]
5109
5109
;
5110
5110
; GFX10-LABEL: v_fshl_i64_5:
5111
5111
; GFX10: ; %bb.0:
5112
5112
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5113
5113
; GFX10-NEXT: v_lshlrev_b64 v[0:1], 5, v[0:1]
5114
5114
; GFX10-NEXT: v_lshrrev_b32_e32 v2, 27, v3
5115
- ; GFX10-NEXT: v_or_b32_e32 v0, v2, v0
5115
+ ; GFX10-NEXT: v_or_b32_e32 v0, v0, v2
5116
5116
; GFX10-NEXT: s_setpc_b64 s[30:31]
5117
5117
;
5118
5118
; GFX11-LABEL: v_fshl_i64_5:
@@ -5121,7 +5121,7 @@ define i64 @v_fshl_i64_5(i64 %lhs, i64 %rhs) {
5121
5121
; GFX11-NEXT: v_lshlrev_b64 v[0:1], 5, v[0:1]
5122
5122
; GFX11-NEXT: v_lshrrev_b32_e32 v2, 27, v3
5123
5123
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
5124
- ; GFX11-NEXT: v_or_b32_e32 v0, v2, v0
5124
+ ; GFX11-NEXT: v_or_b32_e32 v0, v0, v2
5125
5125
; GFX11-NEXT: s_setpc_b64 s[30:31]
5126
5126
%result = call i64 @llvm.fshl.i64 (i64 %lhs , i64 %rhs , i64 5 )
5127
5127
ret i64 %result
@@ -6872,7 +6872,7 @@ define amdgpu_ps i128 @s_fshl_i128_65(i128 inreg %lhs, i128 inreg %rhs) {
6872
6872
; GFX6-NEXT: s_lshl_b64 s[4:5], s[6:7], 1
6873
6873
; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
6874
6874
; GFX6-NEXT: s_lshr_b32 s4, s7, 31
6875
- ; GFX6-NEXT: s_or_b32 s2, s4, s2
6875
+ ; GFX6-NEXT: s_or_b32 s2, s2, s4
6876
6876
; GFX6-NEXT: ; return to shader part epilog
6877
6877
;
6878
6878
; GFX8-LABEL: s_fshl_i128_65:
@@ -6883,7 +6883,7 @@ define amdgpu_ps i128 @s_fshl_i128_65(i128 inreg %lhs, i128 inreg %rhs) {
6883
6883
; GFX8-NEXT: s_lshl_b64 s[4:5], s[6:7], 1
6884
6884
; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
6885
6885
; GFX8-NEXT: s_lshr_b32 s4, s7, 31
6886
- ; GFX8-NEXT: s_or_b32 s2, s4, s2
6886
+ ; GFX8-NEXT: s_or_b32 s2, s2, s4
6887
6887
; GFX8-NEXT: ; return to shader part epilog
6888
6888
;
6889
6889
; GFX9-LABEL: s_fshl_i128_65:
@@ -6894,7 +6894,7 @@ define amdgpu_ps i128 @s_fshl_i128_65(i128 inreg %lhs, i128 inreg %rhs) {
6894
6894
; GFX9-NEXT: s_lshl_b64 s[4:5], s[6:7], 1
6895
6895
; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
6896
6896
; GFX9-NEXT: s_lshr_b32 s4, s7, 31
6897
- ; GFX9-NEXT: s_or_b32 s2, s4, s2
6897
+ ; GFX9-NEXT: s_or_b32 s2, s2, s4
6898
6898
; GFX9-NEXT: ; return to shader part epilog
6899
6899
;
6900
6900
; GFX10-LABEL: s_fshl_i128_65:
@@ -6905,7 +6905,7 @@ define amdgpu_ps i128 @s_fshl_i128_65(i128 inreg %lhs, i128 inreg %rhs) {
6905
6905
; GFX10-NEXT: s_lshl_b64 s[4:5], s[6:7], 1
6906
6906
; GFX10-NEXT: s_lshr_b32 s6, s7, 31
6907
6907
; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
6908
- ; GFX10-NEXT: s_or_b32 s2, s6, s2
6908
+ ; GFX10-NEXT: s_or_b32 s2, s2, s6
6909
6909
; GFX10-NEXT: ; return to shader part epilog
6910
6910
;
6911
6911
; GFX11-LABEL: s_fshl_i128_65:
@@ -6916,7 +6916,7 @@ define amdgpu_ps i128 @s_fshl_i128_65(i128 inreg %lhs, i128 inreg %rhs) {
6916
6916
; GFX11-NEXT: s_lshl_b64 s[4:5], s[6:7], 1
6917
6917
; GFX11-NEXT: s_lshr_b32 s6, s7, 31
6918
6918
; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
6919
- ; GFX11-NEXT: s_or_b32 s2, s6, s2
6919
+ ; GFX11-NEXT: s_or_b32 s2, s2, s6
6920
6920
; GFX11-NEXT: ; return to shader part epilog
6921
6921
%result = call i128 @llvm.fshl.i128 (i128 %lhs , i128 %rhs , i128 65 )
6922
6922
ret i128 %result
@@ -6931,7 +6931,7 @@ define i128 @v_fshl_i128_65(i128 %lhs, i128 %rhs) {
6931
6931
; GFX6-NEXT: v_lshrrev_b32_e32 v4, 31, v5
6932
6932
; GFX6-NEXT: v_or_b32_e32 v0, v4, v0
6933
6933
; GFX6-NEXT: v_lshrrev_b32_e32 v4, 31, v7
6934
- ; GFX6-NEXT: v_or_b32_e32 v2, v4, v2
6934
+ ; GFX6-NEXT: v_or_b32_e32 v2, v2, v4
6935
6935
; GFX6-NEXT: s_setpc_b64 s[30:31]
6936
6936
;
6937
6937
; GFX8-LABEL: v_fshl_i128_65:
@@ -6942,7 +6942,7 @@ define i128 @v_fshl_i128_65(i128 %lhs, i128 %rhs) {
6942
6942
; GFX8-NEXT: v_lshrrev_b32_e32 v4, 31, v5
6943
6943
; GFX8-NEXT: v_or_b32_e32 v0, v4, v0
6944
6944
; GFX8-NEXT: v_lshrrev_b32_e32 v4, 31, v7
6945
- ; GFX8-NEXT: v_or_b32_e32 v2, v4, v2
6945
+ ; GFX8-NEXT: v_or_b32_e32 v2, v2, v4
6946
6946
; GFX8-NEXT: s_setpc_b64 s[30:31]
6947
6947
;
6948
6948
; GFX9-LABEL: v_fshl_i128_65:
@@ -6953,7 +6953,7 @@ define i128 @v_fshl_i128_65(i128 %lhs, i128 %rhs) {
6953
6953
; GFX9-NEXT: v_lshrrev_b32_e32 v4, 31, v5
6954
6954
; GFX9-NEXT: v_or_b32_e32 v0, v4, v0
6955
6955
; GFX9-NEXT: v_lshrrev_b32_e32 v4, 31, v7
6956
- ; GFX9-NEXT: v_or_b32_e32 v2, v4, v2
6956
+ ; GFX9-NEXT: v_or_b32_e32 v2, v2, v4
6957
6957
; GFX9-NEXT: s_setpc_b64 s[30:31]
6958
6958
;
6959
6959
; GFX10-LABEL: v_fshl_i128_65:
@@ -6964,7 +6964,7 @@ define i128 @v_fshl_i128_65(i128 %lhs, i128 %rhs) {
6964
6964
; GFX10-NEXT: v_lshrrev_b32_e32 v4, 31, v5
6965
6965
; GFX10-NEXT: v_lshrrev_b32_e32 v5, 31, v7
6966
6966
; GFX10-NEXT: v_or_b32_e32 v0, v4, v0
6967
- ; GFX10-NEXT: v_or_b32_e32 v2, v5, v2
6967
+ ; GFX10-NEXT: v_or_b32_e32 v2, v2, v5
6968
6968
; GFX10-NEXT: s_setpc_b64 s[30:31]
6969
6969
;
6970
6970
; GFX11-LABEL: v_fshl_i128_65:
@@ -6976,7 +6976,7 @@ define i128 @v_fshl_i128_65(i128 %lhs, i128 %rhs) {
6976
6976
; GFX11-NEXT: v_lshrrev_b32_e32 v5, 31, v7
6977
6977
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
6978
6978
; GFX11-NEXT: v_or_b32_e32 v0, v4, v0
6979
- ; GFX11-NEXT: v_or_b32_e32 v2, v5, v2
6979
+ ; GFX11-NEXT: v_or_b32_e32 v2, v2, v5
6980
6980
; GFX11-NEXT: s_setpc_b64 s[30:31]
6981
6981
%result = call i128 @llvm.fshl.i128 (i128 %lhs , i128 %rhs , i128 65 )
6982
6982
ret i128 %result
0 commit comments