Skip to content

Commit 0966dd0

Browse files
committed
GlobalISel: Handle widenScalar of arbitrary G_MERGE_VALUES sources
Extract the sources to the GCD of the original size and target size, padding with implicit_def as necessary. Also fix the case where the requested source type is wider than the original result type. This was ignoring the type, and just using the destination. Do the operation in the requested type and truncate back. llvm-svn: 366367
1 parent 914a59c commit 0966dd0

File tree

5 files changed

+490
-186
lines changed

5 files changed

+490
-186
lines changed

llvm/include/llvm/Support/MathExtras.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -559,15 +559,20 @@ inline unsigned Log2_64_Ceil(uint64_t Value) {
559559
}
560560

561561
/// Return the greatest common divisor of the values using Euclid's algorithm.
562-
inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) {
562+
template <typename T>
563+
inline T greatestCommonDivisor(T A, T B) {
563564
while (B) {
564-
uint64_t T = B;
565+
T Tmp = B;
565566
B = A % B;
566-
A = T;
567+
A = Tmp;
567568
}
568569
return A;
569570
}
570571

572+
inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) {
573+
return greatestCommonDivisor<uint64_t>(A, B);
574+
}
575+
571576
/// This function takes a 64-bit integer and returns the bit equivalent double.
572577
inline double BitsToDouble(uint64_t Bits) {
573578
double D;

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 84 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -871,71 +871,107 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
871871

872872
Register Src1 = MI.getOperand(1).getReg();
873873
LLT SrcTy = MRI.getType(Src1);
874-
int NumMerge = DstTy.getSizeInBits() / WideTy.getSizeInBits();
874+
const int DstSize = DstTy.getSizeInBits();
875+
const int SrcSize = SrcTy.getSizeInBits();
876+
const int WideSize = WideTy.getSizeInBits();
877+
const int NumMerge = (DstSize + WideSize - 1) / WideSize;
875878

876-
// Try to turn this into a merge of merges if we can use the requested type as
877-
// the source.
878-
if (NumMerge > 1) {
879-
int PartsPerMerge = WideTy.getSizeInBits() / SrcTy.getSizeInBits();
880-
if (WideTy.getSizeInBits() % SrcTy.getSizeInBits() != 0)
881-
return UnableToLegalize;
882-
883-
int RemainderBits = DstTy.getSizeInBits() % WideTy.getSizeInBits();
884-
int RemainderParts = RemainderBits / SrcTy.getSizeInBits();
879+
unsigned NumOps = MI.getNumOperands();
880+
unsigned NumSrc = MI.getNumOperands() - 1;
881+
unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
885882

886-
SmallVector<Register, 4> Parts;
887-
SmallVector<Register, 4> SubMerges;
883+
if (WideSize >= DstSize) {
884+
// Directly pack the bits in the target type.
885+
Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
888886

889-
for (int I = 0; I != NumMerge; ++I) {
890-
for (int J = 0; J != PartsPerMerge; ++J)
891-
Parts.push_back(MI.getOperand(I * PartsPerMerge + J + 1).getReg());
887+
for (unsigned I = 2; I != NumOps; ++I) {
888+
const unsigned Offset = (I - 1) * PartSize;
892889

893-
auto SubMerge = MIRBuilder.buildMerge(WideTy, Parts);
894-
SubMerges.push_back(SubMerge.getReg(0));
895-
Parts.clear();
896-
}
890+
Register SrcReg = MI.getOperand(I).getReg();
891+
assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
897892

898-
if (RemainderParts == 0) {
899-
MIRBuilder.buildMerge(DstReg, SubMerges);
900-
MI.eraseFromParent();
901-
return Legalized;
902-
}
893+
auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
903894

904-
assert(RemainderParts == 1);
895+
Register NextResult = I + 1 == NumOps && WideSize == DstSize ? DstReg :
896+
MRI.createGenericVirtualRegister(WideTy);
905897

906-
auto AnyExt = MIRBuilder.buildAnyExt(
907-
WideTy, MI.getOperand(MI.getNumOperands() - 1).getReg());
908-
SubMerges.push_back(AnyExt.getReg(0));
898+
auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
899+
auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
900+
MIRBuilder.buildOr(NextResult, ResultReg, Shl);
901+
ResultReg = NextResult;
902+
}
909903

910-
LLT WiderDstTy = LLT::scalar(SubMerges.size() * WideTy.getSizeInBits());
911-
auto Merge = MIRBuilder.buildMerge(WiderDstTy, SubMerges);
912-
MIRBuilder.buildTrunc(DstReg, Merge);
904+
if (WideSize > DstSize)
905+
MIRBuilder.buildTrunc(DstReg, ResultReg);
913906

914907
MI.eraseFromParent();
915908
return Legalized;
916909
}
917910

918-
unsigned NumOps = MI.getNumOperands();
919-
unsigned NumSrc = MI.getNumOperands() - 1;
920-
unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
921-
922-
Register ResultReg = MIRBuilder.buildZExt(DstTy, Src1).getReg(0);
923-
924-
for (unsigned I = 2; I != NumOps; ++I) {
925-
const unsigned Offset = (I - 1) * PartSize;
926-
911+
// Unmerge the original values to the GCD type, and recombine to the next
912+
// multiple greater than the original type.
913+
//
914+
// %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
915+
// %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
916+
// %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
917+
// %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
918+
// %10:_(s6) = G_MERGE_VALUES %4, %5, %6
919+
// %11:_(s6) = G_MERGE_VALUES %7, %8, %9
920+
// %12:_(s12) = G_MERGE_VALUES %10, %11
921+
//
922+
// Padding with undef if necessary:
923+
//
924+
// %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
925+
// %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
926+
// %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
927+
// %7:_(s2) = G_IMPLICIT_DEF
928+
// %8:_(s6) = G_MERGE_VALUES %3, %4, %5
929+
// %9:_(s6) = G_MERGE_VALUES %6, %7, %7
930+
// %10:_(s12) = G_MERGE_VALUES %8, %9
931+
932+
const int GCD = greatestCommonDivisor(SrcSize, WideSize);
933+
LLT GCDTy = LLT::scalar(GCD);
934+
935+
SmallVector<Register, 8> Parts;
936+
SmallVector<Register, 8> NewMergeRegs;
937+
SmallVector<Register, 8> Unmerges;
938+
LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
939+
940+
// Decompose the original operands if they don't evenly divide.
941+
for (int I = 1, E = MI.getNumOperands(); I != E; ++I) {
927942
Register SrcReg = MI.getOperand(I).getReg();
928-
assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
943+
if (GCD == SrcSize) {
944+
Unmerges.push_back(SrcReg);
945+
} else {
946+
auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
947+
for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
948+
Unmerges.push_back(Unmerge.getReg(J));
949+
}
950+
}
929951

930-
auto ZextInput = MIRBuilder.buildZExt(DstTy, SrcReg);
952+
// Pad with undef to the next size that is a multiple of the requested size.
953+
if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
954+
Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
955+
for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
956+
Unmerges.push_back(UndefReg);
957+
}
931958

932-
Register NextResult = I + 1 == NumOps ? DstReg :
933-
MRI.createGenericVirtualRegister(DstTy);
959+
const int PartsPerGCD = WideSize / GCD;
934960

935-
auto ShiftAmt = MIRBuilder.buildConstant(DstTy, Offset);
936-
auto Shl = MIRBuilder.buildShl(DstTy, ZextInput, ShiftAmt);
937-
MIRBuilder.buildOr(NextResult, ResultReg, Shl);
938-
ResultReg = NextResult;
961+
// Build merges of each piece.
962+
ArrayRef<Register> Slicer(Unmerges);
963+
for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
964+
auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
965+
NewMergeRegs.push_back(Merge.getReg(0));
966+
}
967+
968+
// A truncate may be necessary if the requested type doesn't evenly divide the
969+
// original result type.
970+
if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
971+
MIRBuilder.buildMerge(DstReg, NewMergeRegs);
972+
} else {
973+
auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
974+
MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
939975
}
940976

941977
MI.eraseFromParent();

llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -568,6 +568,7 @@ MachineInstrBuilder MachineIRBuilder::buildMerge(const DstOp &Res,
568568
// we need some temporary storage for the DstOp objects. Here we use a
569569
// sufficiently large SmallVector to not go through the heap.
570570
SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end());
571+
assert(TmpVec.size() > 1);
571572
return buildInstr(TargetOpcode::G_MERGE_VALUES, Res, TmpVec);
572573
}
573574

@@ -577,6 +578,7 @@ MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<LLT> Res,
577578
// we need some temporary storage for the DstOp objects. Here we use a
578579
// sufficiently large SmallVector to not go through the heap.
579580
SmallVector<DstOp, 8> TmpVec(Res.begin(), Res.end());
581+
assert(TmpVec.size() > 1);
580582
return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op);
581583
}
582584

@@ -595,6 +597,7 @@ MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<Register> Res,
595597
// we need some temporary storage for the DstOp objects. Here we use a
596598
// sufficiently large SmallVector to not go through the heap.
597599
SmallVector<DstOp, 8> TmpVec(Res.begin(), Res.end());
600+
assert(TmpVec.size() > 1);
598601
return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op);
599602
}
600603

0 commit comments

Comments
 (0)