@@ -871,71 +871,107 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
871
871
872
872
Register Src1 = MI.getOperand (1 ).getReg ();
873
873
LLT SrcTy = MRI.getType (Src1);
874
- int NumMerge = DstTy.getSizeInBits () / WideTy.getSizeInBits ();
874
+ const int DstSize = DstTy.getSizeInBits ();
875
+ const int SrcSize = SrcTy.getSizeInBits ();
876
+ const int WideSize = WideTy.getSizeInBits ();
877
+ const int NumMerge = (DstSize + WideSize - 1 ) / WideSize;
875
878
876
- // Try to turn this into a merge of merges if we can use the requested type as
877
- // the source.
878
- if (NumMerge > 1 ) {
879
- int PartsPerMerge = WideTy.getSizeInBits () / SrcTy.getSizeInBits ();
880
- if (WideTy.getSizeInBits () % SrcTy.getSizeInBits () != 0 )
881
- return UnableToLegalize;
882
-
883
- int RemainderBits = DstTy.getSizeInBits () % WideTy.getSizeInBits ();
884
- int RemainderParts = RemainderBits / SrcTy.getSizeInBits ();
879
+ unsigned NumOps = MI.getNumOperands ();
880
+ unsigned NumSrc = MI.getNumOperands () - 1 ;
881
+ unsigned PartSize = DstTy.getSizeInBits () / NumSrc;
885
882
886
- SmallVector<Register, 4 > Parts;
887
- SmallVector<Register, 4 > SubMerges;
883
+ if (WideSize >= DstSize) {
884
+ // Directly pack the bits in the target type.
885
+ Register ResultReg = MIRBuilder.buildZExt (WideTy, Src1).getReg (0 );
888
886
889
- for (int I = 0 ; I != NumMerge; ++I) {
890
- for (int J = 0 ; J != PartsPerMerge; ++J)
891
- Parts.push_back (MI.getOperand (I * PartsPerMerge + J + 1 ).getReg ());
887
+ for (unsigned I = 2 ; I != NumOps; ++I) {
888
+ const unsigned Offset = (I - 1 ) * PartSize;
892
889
893
- auto SubMerge = MIRBuilder.buildMerge (WideTy, Parts);
894
- SubMerges.push_back (SubMerge.getReg (0 ));
895
- Parts.clear ();
896
- }
890
+ Register SrcReg = MI.getOperand (I).getReg ();
891
+ assert (MRI.getType (SrcReg) == LLT::scalar (PartSize));
897
892
898
- if (RemainderParts == 0 ) {
899
- MIRBuilder.buildMerge (DstReg, SubMerges);
900
- MI.eraseFromParent ();
901
- return Legalized;
902
- }
893
+ auto ZextInput = MIRBuilder.buildZExt (WideTy, SrcReg);
903
894
904
- assert (RemainderParts == 1 );
895
+ Register NextResult = I + 1 == NumOps && WideSize == DstSize ? DstReg :
896
+ MRI.createGenericVirtualRegister (WideTy);
905
897
906
- auto AnyExt = MIRBuilder.buildAnyExt (
907
- WideTy, MI.getOperand (MI.getNumOperands () - 1 ).getReg ());
908
- SubMerges.push_back (AnyExt.getReg (0 ));
898
+ auto ShiftAmt = MIRBuilder.buildConstant (WideTy, Offset);
899
+ auto Shl = MIRBuilder.buildShl (WideTy, ZextInput, ShiftAmt);
900
+ MIRBuilder.buildOr (NextResult, ResultReg, Shl);
901
+ ResultReg = NextResult;
902
+ }
909
903
910
- LLT WiderDstTy = LLT::scalar (SubMerges.size () * WideTy.getSizeInBits ());
911
- auto Merge = MIRBuilder.buildMerge (WiderDstTy, SubMerges);
912
- MIRBuilder.buildTrunc (DstReg, Merge);
904
+ if (WideSize > DstSize)
905
+ MIRBuilder.buildTrunc (DstReg, ResultReg);
913
906
914
907
MI.eraseFromParent ();
915
908
return Legalized;
916
909
}
917
910
918
- unsigned NumOps = MI.getNumOperands ();
919
- unsigned NumSrc = MI.getNumOperands () - 1 ;
920
- unsigned PartSize = DstTy.getSizeInBits () / NumSrc;
921
-
922
- Register ResultReg = MIRBuilder.buildZExt (DstTy, Src1).getReg (0 );
923
-
924
- for (unsigned I = 2 ; I != NumOps; ++I) {
925
- const unsigned Offset = (I - 1 ) * PartSize;
926
-
911
+ // Unmerge the original values to the GCD type, and recombine to the next
912
+ // multiple greater than the original type.
913
+ //
914
+ // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
915
+ // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
916
+ // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
917
+ // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
918
+ // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
919
+ // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
920
+ // %12:_(s12) = G_MERGE_VALUES %10, %11
921
+ //
922
+ // Padding with undef if necessary:
923
+ //
924
+ // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
925
+ // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
926
+ // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
927
+ // %7:_(s2) = G_IMPLICIT_DEF
928
+ // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
929
+ // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
930
+ // %10:_(s12) = G_MERGE_VALUES %8, %9
931
+
932
+ const int GCD = greatestCommonDivisor (SrcSize, WideSize);
933
+ LLT GCDTy = LLT::scalar (GCD);
934
+
935
+ SmallVector<Register, 8 > Parts;
936
+ SmallVector<Register, 8 > NewMergeRegs;
937
+ SmallVector<Register, 8 > Unmerges;
938
+ LLT WideDstTy = LLT::scalar (NumMerge * WideSize);
939
+
940
+ // Decompose the original operands if they don't evenly divide.
941
+ for (int I = 1 , E = MI.getNumOperands (); I != E; ++I) {
927
942
Register SrcReg = MI.getOperand (I).getReg ();
928
- assert (MRI.getType (SrcReg) == LLT::scalar (PartSize));
943
+ if (GCD == SrcSize) {
944
+ Unmerges.push_back (SrcReg);
945
+ } else {
946
+ auto Unmerge = MIRBuilder.buildUnmerge (GCDTy, SrcReg);
947
+ for (int J = 0 , JE = Unmerge->getNumOperands () - 1 ; J != JE; ++J)
948
+ Unmerges.push_back (Unmerge.getReg (J));
949
+ }
950
+ }
929
951
930
- auto ZextInput = MIRBuilder.buildZExt (DstTy, SrcReg);
952
+ // Pad with undef to the next size that is a multiple of the requested size.
953
+ if (static_cast <int >(Unmerges.size ()) != NumMerge * WideSize) {
954
+ Register UndefReg = MIRBuilder.buildUndef (GCDTy).getReg (0 );
955
+ for (int I = Unmerges.size (); I != NumMerge * WideSize; ++I)
956
+ Unmerges.push_back (UndefReg);
957
+ }
931
958
932
- Register NextResult = I + 1 == NumOps ? DstReg :
933
- MRI.createGenericVirtualRegister (DstTy);
959
+ const int PartsPerGCD = WideSize / GCD;
934
960
935
- auto ShiftAmt = MIRBuilder.buildConstant (DstTy, Offset);
936
- auto Shl = MIRBuilder.buildShl (DstTy, ZextInput, ShiftAmt);
937
- MIRBuilder.buildOr (NextResult, ResultReg, Shl);
938
- ResultReg = NextResult;
961
+ // Build merges of each piece.
962
+ ArrayRef<Register> Slicer (Unmerges);
963
+ for (int I = 0 ; I != NumMerge; ++I, Slicer = Slicer.drop_front (PartsPerGCD)) {
964
+ auto Merge = MIRBuilder.buildMerge (WideTy, Slicer.take_front (PartsPerGCD));
965
+ NewMergeRegs.push_back (Merge.getReg (0 ));
966
+ }
967
+
968
+ // A truncate may be necessary if the requested type doesn't evenly divide the
969
+ // original result type.
970
+ if (DstTy.getSizeInBits () == WideDstTy.getSizeInBits ()) {
971
+ MIRBuilder.buildMerge (DstReg, NewMergeRegs);
972
+ } else {
973
+ auto FinalMerge = MIRBuilder.buildMerge (WideDstTy, NewMergeRegs);
974
+ MIRBuilder.buildTrunc (DstReg, FinalMerge.getReg (0 ));
939
975
}
940
976
941
977
MI.eraseFromParent ();
0 commit comments