@@ -4959,17 +4959,15 @@ define amdgpu_ps i64 @s_fshl_i64_5(i64 inreg %lhs, i64 inreg %rhs) {
4959
4959
; GCN: ; %bb.0:
4960
4960
; GCN-NEXT: s_lshl_b64 s[0:1], s[0:1], 5
4961
4961
; GCN-NEXT: s_lshr_b32 s2, s3, 27
4962
- ; GCN-NEXT: s_mov_b32 s3, 0
4963
- ; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
4962
+ ; GCN-NEXT: s_or_b32 s0, s0, s2
4964
4963
; GCN-NEXT: ; return to shader part epilog
4965
4964
;
4966
4965
; GFX11-LABEL: s_fshl_i64_5:
4967
4966
; GFX11: ; %bb.0:
4968
4967
; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 5
4969
4968
; GFX11-NEXT: s_lshr_b32 s2, s3, 27
4970
- ; GFX11-NEXT: s_mov_b32 s3, 0
4971
4969
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
4972
- ; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
4970
+ ; GFX11-NEXT: s_or_b32 s0, s0, s2
4973
4971
; GFX11-NEXT: ; return to shader part epilog
4974
4972
%result = call i64 @llvm.fshl.i64 (i64 %lhs , i64 %rhs , i64 5 )
4975
4973
ret i64 %result
@@ -4979,20 +4977,13 @@ define amdgpu_ps i64 @s_fshl_i64_32(i64 inreg %lhs, i64 inreg %rhs) {
4979
4977
; GCN-LABEL: s_fshl_i64_32:
4980
4978
; GCN: ; %bb.0:
4981
4979
; GCN-NEXT: s_mov_b32 s1, s0
4982
- ; GCN-NEXT: s_mov_b32 s0, 0
4983
- ; GCN-NEXT: s_mov_b32 s2, s3
4984
- ; GCN-NEXT: s_mov_b32 s3, s0
4985
- ; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
4980
+ ; GCN-NEXT: s_mov_b32 s0, s3
4986
4981
; GCN-NEXT: ; return to shader part epilog
4987
4982
;
4988
4983
; GFX11-LABEL: s_fshl_i64_32:
4989
4984
; GFX11: ; %bb.0:
4990
4985
; GFX11-NEXT: s_mov_b32 s1, s0
4991
- ; GFX11-NEXT: s_mov_b32 s0, 0
4992
- ; GFX11-NEXT: s_mov_b32 s2, s3
4993
- ; GFX11-NEXT: s_mov_b32 s3, s0
4994
- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
4995
- ; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
4986
+ ; GFX11-NEXT: s_mov_b32 s0, s3
4996
4987
; GFX11-NEXT: ; return to shader part epilog
4997
4988
%result = call i64 @llvm.fshl.i64 (i64 %lhs , i64 %rhs , i64 32 )
4998
4989
ret i64 %result
@@ -6877,56 +6868,50 @@ define amdgpu_ps i128 @s_fshl_i128_65(i128 inreg %lhs, i128 inreg %rhs) {
6877
6868
; GFX6: ; %bb.0:
6878
6869
; GFX6-NEXT: s_lshl_b64 s[2:3], s[0:1], 1
6879
6870
; GFX6-NEXT: s_lshr_b32 s4, s5, 31
6880
- ; GFX6-NEXT: s_mov_b32 s5, 0
6881
6871
; GFX6-NEXT: s_lshl_b64 s[0:1], s[6:7], 1
6882
- ; GFX6-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1]
6872
+ ; GFX6-NEXT: s_or_b32 s0, s0, s4
6883
6873
; GFX6-NEXT: s_lshr_b32 s4, s7, 31
6884
- ; GFX6-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
6874
+ ; GFX6-NEXT: s_or_b32 s2, s2, s4
6885
6875
; GFX6-NEXT: ; return to shader part epilog
6886
6876
;
6887
6877
; GFX8-LABEL: s_fshl_i128_65:
6888
6878
; GFX8: ; %bb.0:
6889
6879
; GFX8-NEXT: s_lshl_b64 s[2:3], s[0:1], 1
6890
6880
; GFX8-NEXT: s_lshr_b32 s4, s5, 31
6891
- ; GFX8-NEXT: s_mov_b32 s5, 0
6892
6881
; GFX8-NEXT: s_lshl_b64 s[0:1], s[6:7], 1
6893
- ; GFX8-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1]
6882
+ ; GFX8-NEXT: s_or_b32 s0, s0, s4
6894
6883
; GFX8-NEXT: s_lshr_b32 s4, s7, 31
6895
- ; GFX8-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
6884
+ ; GFX8-NEXT: s_or_b32 s2, s2, s4
6896
6885
; GFX8-NEXT: ; return to shader part epilog
6897
6886
;
6898
6887
; GFX9-LABEL: s_fshl_i128_65:
6899
6888
; GFX9: ; %bb.0:
6900
6889
; GFX9-NEXT: s_lshl_b64 s[2:3], s[0:1], 1
6901
6890
; GFX9-NEXT: s_lshr_b32 s4, s5, 31
6902
- ; GFX9-NEXT: s_mov_b32 s5, 0
6903
6891
; GFX9-NEXT: s_lshl_b64 s[0:1], s[6:7], 1
6904
- ; GFX9-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1]
6892
+ ; GFX9-NEXT: s_or_b32 s0, s0, s4
6905
6893
; GFX9-NEXT: s_lshr_b32 s4, s7, 31
6906
- ; GFX9-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
6894
+ ; GFX9-NEXT: s_or_b32 s2, s2, s4
6907
6895
; GFX9-NEXT: ; return to shader part epilog
6908
6896
;
6909
6897
; GFX10-LABEL: s_fshl_i128_65:
6910
6898
; GFX10: ; %bb.0:
6911
- ; GFX10-NEXT: s_lshr_b32 s2, s5, 31
6912
- ; GFX10-NEXT: s_mov_b32 s3, 0
6913
- ; GFX10-NEXT: s_lshl_b64 s[4:5], s[6:7], 1
6914
- ; GFX10-NEXT: s_lshl_b64 s[8:9], s[0:1], 1
6915
- ; GFX10-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5]
6916
- ; GFX10-NEXT: s_lshr_b32 s2, s7, 31
6917
- ; GFX10-NEXT: s_or_b64 s[2:3], s[8:9], s[2:3]
6899
+ ; GFX10-NEXT: s_lshl_b64 s[2:3], s[0:1], 1
6900
+ ; GFX10-NEXT: s_lshr_b32 s4, s5, 31
6901
+ ; GFX10-NEXT: s_lshl_b64 s[0:1], s[6:7], 1
6902
+ ; GFX10-NEXT: s_lshr_b32 s5, s7, 31
6903
+ ; GFX10-NEXT: s_or_b32 s0, s0, s4
6904
+ ; GFX10-NEXT: s_or_b32 s2, s2, s5
6918
6905
; GFX10-NEXT: ; return to shader part epilog
6919
6906
;
6920
6907
; GFX11-LABEL: s_fshl_i128_65:
6921
6908
; GFX11: ; %bb.0:
6922
- ; GFX11-NEXT: s_lshr_b32 s2, s5, 31
6923
- ; GFX11-NEXT: s_mov_b32 s3, 0
6924
- ; GFX11-NEXT: s_lshl_b64 s[4:5], s[6:7], 1
6925
- ; GFX11-NEXT: s_lshl_b64 s[8:9], s[0:1], 1
6926
- ; GFX11-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5]
6927
- ; GFX11-NEXT: s_lshr_b32 s2, s7, 31
6928
- ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
6929
- ; GFX11-NEXT: s_or_b64 s[2:3], s[8:9], s[2:3]
6909
+ ; GFX11-NEXT: s_lshl_b64 s[2:3], s[0:1], 1
6910
+ ; GFX11-NEXT: s_lshr_b32 s4, s5, 31
6911
+ ; GFX11-NEXT: s_lshl_b64 s[0:1], s[6:7], 1
6912
+ ; GFX11-NEXT: s_lshr_b32 s5, s7, 31
6913
+ ; GFX11-NEXT: s_or_b32 s0, s0, s4
6914
+ ; GFX11-NEXT: s_or_b32 s2, s2, s5
6930
6915
; GFX11-NEXT: ; return to shader part epilog
6931
6916
%result = call i128 @llvm.fshl.i128 (i128 %lhs , i128 %rhs , i128 65 )
6932
6917
ret i128 %result
@@ -6939,7 +6924,7 @@ define i128 @v_fshl_i128_65(i128 %lhs, i128 %rhs) {
6939
6924
; GFX6-NEXT: v_lshl_b64 v[2:3], v[0:1], 1
6940
6925
; GFX6-NEXT: v_lshl_b64 v[0:1], v[6:7], 1
6941
6926
; GFX6-NEXT: v_lshrrev_b32_e32 v4, 31, v5
6942
- ; GFX6-NEXT: v_or_b32_e32 v0, v4, v0
6927
+ ; GFX6-NEXT: v_or_b32_e32 v0, v0, v4
6943
6928
; GFX6-NEXT: v_lshrrev_b32_e32 v4, 31, v7
6944
6929
; GFX6-NEXT: v_or_b32_e32 v2, v2, v4
6945
6930
; GFX6-NEXT: s_setpc_b64 s[30:31]
@@ -6950,7 +6935,7 @@ define i128 @v_fshl_i128_65(i128 %lhs, i128 %rhs) {
6950
6935
; GFX8-NEXT: v_lshlrev_b64 v[2:3], 1, v[0:1]
6951
6936
; GFX8-NEXT: v_lshlrev_b64 v[0:1], 1, v[6:7]
6952
6937
; GFX8-NEXT: v_lshrrev_b32_e32 v4, 31, v5
6953
- ; GFX8-NEXT: v_or_b32_e32 v0, v4, v0
6938
+ ; GFX8-NEXT: v_or_b32_e32 v0, v0, v4
6954
6939
; GFX8-NEXT: v_lshrrev_b32_e32 v4, 31, v7
6955
6940
; GFX8-NEXT: v_or_b32_e32 v2, v2, v4
6956
6941
; GFX8-NEXT: s_setpc_b64 s[30:31]
@@ -6961,7 +6946,7 @@ define i128 @v_fshl_i128_65(i128 %lhs, i128 %rhs) {
6961
6946
; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[0:1]
6962
6947
; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[6:7]
6963
6948
; GFX9-NEXT: v_lshrrev_b32_e32 v4, 31, v5
6964
- ; GFX9-NEXT: v_or_b32_e32 v0, v4, v0
6949
+ ; GFX9-NEXT: v_or_b32_e32 v0, v0, v4
6965
6950
; GFX9-NEXT: v_lshrrev_b32_e32 v4, 31, v7
6966
6951
; GFX9-NEXT: v_or_b32_e32 v2, v2, v4
6967
6952
; GFX9-NEXT: s_setpc_b64 s[30:31]
@@ -6973,7 +6958,7 @@ define i128 @v_fshl_i128_65(i128 %lhs, i128 %rhs) {
6973
6958
; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[6:7]
6974
6959
; GFX10-NEXT: v_lshrrev_b32_e32 v4, 31, v5
6975
6960
; GFX10-NEXT: v_lshrrev_b32_e32 v5, 31, v7
6976
- ; GFX10-NEXT: v_or_b32_e32 v0, v4, v0
6961
+ ; GFX10-NEXT: v_or_b32_e32 v0, v0, v4
6977
6962
; GFX10-NEXT: v_or_b32_e32 v2, v2, v5
6978
6963
; GFX10-NEXT: s_setpc_b64 s[30:31]
6979
6964
;
@@ -6985,7 +6970,7 @@ define i128 @v_fshl_i128_65(i128 %lhs, i128 %rhs) {
6985
6970
; GFX11-NEXT: v_lshrrev_b32_e32 v4, 31, v5
6986
6971
; GFX11-NEXT: v_lshrrev_b32_e32 v5, 31, v7
6987
6972
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
6988
- ; GFX11-NEXT: v_or_b32_e32 v0, v4, v0
6973
+ ; GFX11-NEXT: v_or_b32_e32 v0, v0, v4
6989
6974
; GFX11-NEXT: v_or_b32_e32 v2, v2, v5
6990
6975
; GFX11-NEXT: s_setpc_b64 s[30:31]
6991
6976
%result = call i128 @llvm.fshl.i128 (i128 %lhs , i128 %rhs , i128 65 )
0 commit comments