@@ -929,6 +929,42 @@ void test_cvt_scalef32_pk(global uint2 *out2, bfloat8 srcbf8, half8 srch8, float
929
929
// CHECK-NEXT: [[TMP43:%.*]] = call i32 @llvm.amdgcn.cvt.scalef32.sr.pk8.fp4.bf16(<8 x bfloat> [[TMP40]], i32 [[TMP41]], float [[TMP42]])
930
930
// CHECK-NEXT: [[TMP44:%.*]] = load ptr addrspace(1), ptr [[OUT1_ADDR_ASCAST]], align 8
931
931
// CHECK-NEXT: store i32 [[TMP43]], ptr addrspace(1) [[TMP44]], align 4
932
+ // CHECK-NEXT: [[TMP45:%.*]] = load <16 x bfloat>, ptr [[SRCBF16_ADDR_ASCAST]], align 32
933
+ // CHECK-NEXT: [[TMP46:%.*]] = load i32, ptr [[SR_ADDR_ASCAST]], align 4
934
+ // CHECK-NEXT: [[TMP47:%.*]] = load float, ptr [[SCALE_ADDR_ASCAST]], align 4
935
+ // CHECK-NEXT: [[TMP48:%.*]] = call <3 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk16.bf6.bf16(<16 x bfloat> [[TMP45]], i32 [[TMP46]], float [[TMP47]])
936
+ // CHECK-NEXT: [[TMP49:%.*]] = load ptr addrspace(1), ptr [[OUT3_ADDR_ASCAST]], align 8
937
+ // CHECK-NEXT: store <3 x i32> [[TMP48]], ptr addrspace(1) [[TMP49]], align 16
938
+ // CHECK-NEXT: [[TMP50:%.*]] = load <16 x half>, ptr [[SRCH16_ADDR_ASCAST]], align 32
939
+ // CHECK-NEXT: [[TMP51:%.*]] = load i32, ptr [[SR_ADDR_ASCAST]], align 4
940
+ // CHECK-NEXT: [[TMP52:%.*]] = load float, ptr [[SCALE_ADDR_ASCAST]], align 4
941
+ // CHECK-NEXT: [[TMP53:%.*]] = call <3 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk16.bf6.f16(<16 x half> [[TMP50]], i32 [[TMP51]], float [[TMP52]])
942
+ // CHECK-NEXT: [[TMP54:%.*]] = load ptr addrspace(1), ptr [[OUT3_ADDR_ASCAST]], align 8
943
+ // CHECK-NEXT: store <3 x i32> [[TMP53]], ptr addrspace(1) [[TMP54]], align 16
944
+ // CHECK-NEXT: [[TMP55:%.*]] = load <16 x bfloat>, ptr [[SRCBF16_ADDR_ASCAST]], align 32
945
+ // CHECK-NEXT: [[TMP56:%.*]] = load i32, ptr [[SR_ADDR_ASCAST]], align 4
946
+ // CHECK-NEXT: [[TMP57:%.*]] = load float, ptr [[SCALE_ADDR_ASCAST]], align 4
947
+ // CHECK-NEXT: [[TMP58:%.*]] = call <3 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk16.fp6.bf16(<16 x bfloat> [[TMP55]], i32 [[TMP56]], float [[TMP57]])
948
+ // CHECK-NEXT: [[TMP59:%.*]] = load ptr addrspace(1), ptr [[OUT3_ADDR_ASCAST]], align 8
949
+ // CHECK-NEXT: store <3 x i32> [[TMP58]], ptr addrspace(1) [[TMP59]], align 16
950
+ // CHECK-NEXT: [[TMP60:%.*]] = load <16 x half>, ptr [[SRCH16_ADDR_ASCAST]], align 32
951
+ // CHECK-NEXT: [[TMP61:%.*]] = load i32, ptr [[SR_ADDR_ASCAST]], align 4
952
+ // CHECK-NEXT: [[TMP62:%.*]] = load float, ptr [[SCALE_ADDR_ASCAST]], align 4
953
+ // CHECK-NEXT: [[TMP63:%.*]] = call <3 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk16.fp6.f16(<16 x half> [[TMP60]], i32 [[TMP61]], float [[TMP62]])
954
+ // CHECK-NEXT: [[TMP64:%.*]] = load ptr addrspace(1), ptr [[OUT3_ADDR_ASCAST]], align 8
955
+ // CHECK-NEXT: store <3 x i32> [[TMP63]], ptr addrspace(1) [[TMP64]], align 16
956
+ // CHECK-NEXT: [[TMP65:%.*]] = load <16 x float>, ptr [[SRCF16_ADDR_ASCAST]], align 64
957
+ // CHECK-NEXT: [[TMP66:%.*]] = load i32, ptr [[SR_ADDR_ASCAST]], align 4
958
+ // CHECK-NEXT: [[TMP67:%.*]] = load float, ptr [[SCALE_ADDR_ASCAST]], align 4
959
+ // CHECK-NEXT: [[TMP68:%.*]] = call <3 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk16.bf6.f32(<16 x float> [[TMP65]], i32 [[TMP66]], float [[TMP67]])
960
+ // CHECK-NEXT: [[TMP69:%.*]] = load ptr addrspace(1), ptr [[OUT3_ADDR_ASCAST]], align 8
961
+ // CHECK-NEXT: store <3 x i32> [[TMP68]], ptr addrspace(1) [[TMP69]], align 16
962
+ // CHECK-NEXT: [[TMP70:%.*]] = load <16 x float>, ptr [[SRCF16_ADDR_ASCAST]], align 64
963
+ // CHECK-NEXT: [[TMP71:%.*]] = load i32, ptr [[SR_ADDR_ASCAST]], align 4
964
+ // CHECK-NEXT: [[TMP72:%.*]] = load float, ptr [[SCALE_ADDR_ASCAST]], align 4
965
+ // CHECK-NEXT: [[TMP73:%.*]] = call <3 x i32> @llvm.amdgcn.cvt.scalef32.sr.pk16.fp6.f32(<16 x float> [[TMP70]], i32 [[TMP71]], float [[TMP72]])
966
+ // CHECK-NEXT: [[TMP74:%.*]] = load ptr addrspace(1), ptr [[OUT3_ADDR_ASCAST]], align 8
967
+ // CHECK-NEXT: store <3 x i32> [[TMP73]], ptr addrspace(1) [[TMP74]], align 16
932
968
// CHECK-NEXT: ret void
933
969
//
934
970
void test_cvt_scalef32_sr_pk (global uint2 * out2 , bfloat8 srcbf8 , half8 srch8 , float8 srcf8 ,
@@ -944,6 +980,12 @@ void test_cvt_scalef32_sr_pk(global uint2 *out2, bfloat8 srcbf8, half8 srch8, fl
944
980
* out1 = __builtin_amdgcn_cvt_scalef32_sr_pk8_fp4_f32 (srcf8 , sr , scale );
945
981
* out1 = __builtin_amdgcn_cvt_scalef32_sr_pk8_fp4_f16 (srch8 , sr , scale );
946
982
* out1 = __builtin_amdgcn_cvt_scalef32_sr_pk8_fp4_bf16 (srcbf8 , sr , scale );
983
+ * out3 = __builtin_amdgcn_cvt_scalef32_sr_pk16_bf6_bf16 (srcbf16 , sr , scale );
984
+ * out3 = __builtin_amdgcn_cvt_scalef32_sr_pk16_bf6_f16 (srch16 , sr , scale );
985
+ * out3 = __builtin_amdgcn_cvt_scalef32_sr_pk16_fp6_bf16 (srcbf16 , sr , scale );
986
+ * out3 = __builtin_amdgcn_cvt_scalef32_sr_pk16_fp6_f16 (srch16 , sr , scale );
987
+ * out3 = __builtin_amdgcn_cvt_scalef32_sr_pk16_bf6_f32 (srcf16 , sr , scale );
988
+ * out3 = __builtin_amdgcn_cvt_scalef32_sr_pk16_fp6_f32 (srcf16 , sr , scale );
947
989
}
948
990
949
991
// CHECK-LABEL: @test_sat_pk4_i4_i8(
0 commit comments