summaryrefslogtreecommitdiffstats
path: root/llvm/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp132
-rw-r--r--llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp3
2 files changed, 87 insertions, 48 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index caa49cf2cf7..f5cf7fc9bd9 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -871,71 +871,107 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
Register Src1 = MI.getOperand(1).getReg();
LLT SrcTy = MRI.getType(Src1);
- int NumMerge = DstTy.getSizeInBits() / WideTy.getSizeInBits();
+ const int DstSize = DstTy.getSizeInBits();
+ const int SrcSize = SrcTy.getSizeInBits();
+ const int WideSize = WideTy.getSizeInBits();
+ const int NumMerge = (DstSize + WideSize - 1) / WideSize;
- // Try to turn this into a merge of merges if we can use the requested type as
- // the source.
- if (NumMerge > 1) {
- int PartsPerMerge = WideTy.getSizeInBits() / SrcTy.getSizeInBits();
- if (WideTy.getSizeInBits() % SrcTy.getSizeInBits() != 0)
- return UnableToLegalize;
-
- int RemainderBits = DstTy.getSizeInBits() % WideTy.getSizeInBits();
- int RemainderParts = RemainderBits / SrcTy.getSizeInBits();
+ unsigned NumOps = MI.getNumOperands();
+ unsigned NumSrc = MI.getNumOperands() - 1;
+ unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
- SmallVector<Register, 4> Parts;
- SmallVector<Register, 4> SubMerges;
+ if (WideSize >= DstSize) {
+ // Directly pack the bits in the target type.
+ Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
- for (int I = 0; I != NumMerge; ++I) {
- for (int J = 0; J != PartsPerMerge; ++J)
- Parts.push_back(MI.getOperand(I * PartsPerMerge + J + 1).getReg());
+ for (unsigned I = 2; I != NumOps; ++I) {
+ const unsigned Offset = (I - 1) * PartSize;
- auto SubMerge = MIRBuilder.buildMerge(WideTy, Parts);
- SubMerges.push_back(SubMerge.getReg(0));
- Parts.clear();
- }
+ Register SrcReg = MI.getOperand(I).getReg();
+ assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
- if (RemainderParts == 0) {
- MIRBuilder.buildMerge(DstReg, SubMerges);
- MI.eraseFromParent();
- return Legalized;
- }
+ auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
- assert(RemainderParts == 1);
+ Register NextResult = I + 1 == NumOps && WideSize == DstSize ? DstReg :
+ MRI.createGenericVirtualRegister(WideTy);
- auto AnyExt = MIRBuilder.buildAnyExt(
- WideTy, MI.getOperand(MI.getNumOperands() - 1).getReg());
- SubMerges.push_back(AnyExt.getReg(0));
+ auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
+ auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
+ MIRBuilder.buildOr(NextResult, ResultReg, Shl);
+ ResultReg = NextResult;
+ }
- LLT WiderDstTy = LLT::scalar(SubMerges.size() * WideTy.getSizeInBits());
- auto Merge = MIRBuilder.buildMerge(WiderDstTy, SubMerges);
- MIRBuilder.buildTrunc(DstReg, Merge);
+ if (WideSize > DstSize)
+ MIRBuilder.buildTrunc(DstReg, ResultReg);
MI.eraseFromParent();
return Legalized;
}
- unsigned NumOps = MI.getNumOperands();
- unsigned NumSrc = MI.getNumOperands() - 1;
- unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
-
- Register ResultReg = MIRBuilder.buildZExt(DstTy, Src1).getReg(0);
-
- for (unsigned I = 2; I != NumOps; ++I) {
- const unsigned Offset = (I - 1) * PartSize;
-
+ // Unmerge the original values to the GCD type, and recombine to the next
+ // multiple greater than the original type.
+ //
+ // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
+ // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
+ // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
+ // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
+ // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
+ // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
+ // %12:_(s12) = G_MERGE_VALUES %10, %11
+ //
+ // Padding with undef if necessary:
+ //
+ // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
+ // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
+ // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
+ // %7:_(s2) = G_IMPLICIT_DEF
+ // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
+ // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
+ // %10:_(s12) = G_MERGE_VALUES %8, %9
+
+ const int GCD = greatestCommonDivisor(SrcSize, WideSize);
+ LLT GCDTy = LLT::scalar(GCD);
+
+ SmallVector<Register, 8> Parts;
+ SmallVector<Register, 8> NewMergeRegs;
+ SmallVector<Register, 8> Unmerges;
+ LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
+
+ // Decompose the original operands if they don't evenly divide.
+ for (int I = 1, E = MI.getNumOperands(); I != E; ++I) {
Register SrcReg = MI.getOperand(I).getReg();
- assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
+ if (GCD == SrcSize) {
+ Unmerges.push_back(SrcReg);
+ } else {
+ auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
+ for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
+ Unmerges.push_back(Unmerge.getReg(J));
+ }
+ }
- auto ZextInput = MIRBuilder.buildZExt(DstTy, SrcReg);
+ // Pad with undef to the next size that is a multiple of the requested size.
+ if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
+ Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
+ for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
+ Unmerges.push_back(UndefReg);
+ }
- Register NextResult = I + 1 == NumOps ? DstReg :
- MRI.createGenericVirtualRegister(DstTy);
+ const int PartsPerGCD = WideSize / GCD;
- auto ShiftAmt = MIRBuilder.buildConstant(DstTy, Offset);
- auto Shl = MIRBuilder.buildShl(DstTy, ZextInput, ShiftAmt);
- MIRBuilder.buildOr(NextResult, ResultReg, Shl);
- ResultReg = NextResult;
+ // Build merges of each piece.
+ ArrayRef<Register> Slicer(Unmerges);
+ for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
+ auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
+ NewMergeRegs.push_back(Merge.getReg(0));
+ }
+
+ // A truncate may be necessary if the requested type doesn't evenly divide the
+ // original result type.
+ if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
+ MIRBuilder.buildMerge(DstReg, NewMergeRegs);
+ } else {
+ auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
+ MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
}
MI.eraseFromParent();
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 8d94454b092..b7a73326b85 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -568,6 +568,7 @@ MachineInstrBuilder MachineIRBuilder::buildMerge(const DstOp &Res,
// we need some temporary storage for the DstOp objects. Here we use a
// sufficiently large SmallVector to not go through the heap.
SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end());
+ assert(TmpVec.size() > 1);
return buildInstr(TargetOpcode::G_MERGE_VALUES, Res, TmpVec);
}
@@ -577,6 +578,7 @@ MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<LLT> Res,
// we need some temporary storage for the DstOp objects. Here we use a
// sufficiently large SmallVector to not go through the heap.
SmallVector<DstOp, 8> TmpVec(Res.begin(), Res.end());
+ assert(TmpVec.size() > 1);
return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op);
}
@@ -595,6 +597,7 @@ MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<Register> Res,
// we need some temporary storage for the DstOp objects. Here we use a
// sufficiently large SmallVector to not go through the heap.
SmallVector<DstOp, 8> TmpVec(Res.begin(), Res.end());
+ assert(TmpVec.size() > 1);
return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op);
}
OpenPOWER on IntegriCloud