diff options
Diffstat (limited to 'llvm/lib/CodeGen')
-rw-r--r-- | llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 132 | ||||
-rw-r--r-- | llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp | 3 |
2 files changed, 87 insertions, 48 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index caa49cf2cf7..f5cf7fc9bd9 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -871,71 +871,107 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, Register Src1 = MI.getOperand(1).getReg(); LLT SrcTy = MRI.getType(Src1); - int NumMerge = DstTy.getSizeInBits() / WideTy.getSizeInBits(); + const int DstSize = DstTy.getSizeInBits(); + const int SrcSize = SrcTy.getSizeInBits(); + const int WideSize = WideTy.getSizeInBits(); + const int NumMerge = (DstSize + WideSize - 1) / WideSize; - // Try to turn this into a merge of merges if we can use the requested type as - // the source. - if (NumMerge > 1) { - int PartsPerMerge = WideTy.getSizeInBits() / SrcTy.getSizeInBits(); - if (WideTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) - return UnableToLegalize; - - int RemainderBits = DstTy.getSizeInBits() % WideTy.getSizeInBits(); - int RemainderParts = RemainderBits / SrcTy.getSizeInBits(); + unsigned NumOps = MI.getNumOperands(); + unsigned NumSrc = MI.getNumOperands() - 1; + unsigned PartSize = DstTy.getSizeInBits() / NumSrc; - SmallVector<Register, 4> Parts; - SmallVector<Register, 4> SubMerges; + if (WideSize >= DstSize) { + // Directly pack the bits in the target type. + Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0); - for (int I = 0; I != NumMerge; ++I) { - for (int J = 0; J != PartsPerMerge; ++J) - Parts.push_back(MI.getOperand(I * PartsPerMerge + J + 1).getReg()); + for (unsigned I = 2; I != NumOps; ++I) { + const unsigned Offset = (I - 1) * PartSize; - auto SubMerge = MIRBuilder.buildMerge(WideTy, Parts); - SubMerges.push_back(SubMerge.getReg(0)); - Parts.clear(); - } + Register SrcReg = MI.getOperand(I).getReg(); + assert(MRI.getType(SrcReg) == LLT::scalar(PartSize)); - if (RemainderParts == 0) { - MIRBuilder.buildMerge(DstReg, SubMerges); - MI.eraseFromParent(); - return Legalized; - } + auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg); - assert(RemainderParts == 1); + Register NextResult = I + 1 == NumOps && WideSize == DstSize ? DstReg : + MRI.createGenericVirtualRegister(WideTy); - auto AnyExt = MIRBuilder.buildAnyExt( - WideTy, MI.getOperand(MI.getNumOperands() - 1).getReg()); - SubMerges.push_back(AnyExt.getReg(0)); + auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset); + auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt); + MIRBuilder.buildOr(NextResult, ResultReg, Shl); + ResultReg = NextResult; + } - LLT WiderDstTy = LLT::scalar(SubMerges.size() * WideTy.getSizeInBits()); - auto Merge = MIRBuilder.buildMerge(WiderDstTy, SubMerges); - MIRBuilder.buildTrunc(DstReg, Merge); + if (WideSize > DstSize) + MIRBuilder.buildTrunc(DstReg, ResultReg); MI.eraseFromParent(); return Legalized; } - unsigned NumOps = MI.getNumOperands(); - unsigned NumSrc = MI.getNumOperands() - 1; - unsigned PartSize = DstTy.getSizeInBits() / NumSrc; - - Register ResultReg = MIRBuilder.buildZExt(DstTy, Src1).getReg(0); - - for (unsigned I = 2; I != NumOps; ++I) { - const unsigned Offset = (I - 1) * PartSize; - + // Unmerge the original values to the GCD type, and recombine to the next + // multiple greater than the original type. + // + // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6 + // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0 + // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1 + // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2 + // %10:_(s6) = G_MERGE_VALUES %4, %5, %6 + // %11:_(s6) = G_MERGE_VALUES %7, %8, %9 + // %12:_(s12) = G_MERGE_VALUES %10, %11 + // + // Padding with undef if necessary: + // + // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6 + // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0 + // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1 + // %7:_(s2) = G_IMPLICIT_DEF + // %8:_(s6) = G_MERGE_VALUES %3, %4, %5 + // %9:_(s6) = G_MERGE_VALUES %6, %7, %7 + // %10:_(s12) = G_MERGE_VALUES %8, %9 + + const int GCD = greatestCommonDivisor(SrcSize, WideSize); + LLT GCDTy = LLT::scalar(GCD); + + SmallVector<Register, 8> Parts; + SmallVector<Register, 8> NewMergeRegs; + SmallVector<Register, 8> Unmerges; + LLT WideDstTy = LLT::scalar(NumMerge * WideSize); + + // Decompose the original operands if they don't evenly divide. + for (int I = 1, E = MI.getNumOperands(); I != E; ++I) { Register SrcReg = MI.getOperand(I).getReg(); - assert(MRI.getType(SrcReg) == LLT::scalar(PartSize)); + if (GCD == SrcSize) { + Unmerges.push_back(SrcReg); + } else { + auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg); + for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J) + Unmerges.push_back(Unmerge.getReg(J)); + } + } - auto ZextInput = MIRBuilder.buildZExt(DstTy, SrcReg); + // Pad with undef to the next size that is a multiple of the requested size. + if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) { + Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0); + for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I) + Unmerges.push_back(UndefReg); + } - Register NextResult = I + 1 == NumOps ? DstReg : - MRI.createGenericVirtualRegister(DstTy); + const int PartsPerGCD = WideSize / GCD; - auto ShiftAmt = MIRBuilder.buildConstant(DstTy, Offset); - auto Shl = MIRBuilder.buildShl(DstTy, ZextInput, ShiftAmt); - MIRBuilder.buildOr(NextResult, ResultReg, Shl); - ResultReg = NextResult; + // Build merges of each piece. + ArrayRef<Register> Slicer(Unmerges); + for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) { + auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD)); + NewMergeRegs.push_back(Merge.getReg(0)); + } + + // A truncate may be necessary if the requested type doesn't evenly divide the + // original result type. + if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) { + MIRBuilder.buildMerge(DstReg, NewMergeRegs); + } else { + auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs); + MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0)); } MI.eraseFromParent(); diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index 8d94454b092..b7a73326b85 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -568,6 +568,7 @@ MachineInstrBuilder MachineIRBuilder::buildMerge(const DstOp &Res, // we need some temporary storage for the DstOp objects. Here we use a // sufficiently large SmallVector to not go through the heap. SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end()); + assert(TmpVec.size() > 1); return buildInstr(TargetOpcode::G_MERGE_VALUES, Res, TmpVec); } @@ -577,6 +578,7 @@ MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<LLT> Res, // we need some temporary storage for the DstOp objects. Here we use a // sufficiently large SmallVector to not go through the heap. SmallVector<DstOp, 8> TmpVec(Res.begin(), Res.end()); + assert(TmpVec.size() > 1); return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op); } @@ -595,6 +597,7 @@ MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<Register> Res, // we need some temporary storage for the DstOp objects. Here we use a // sufficiently large SmallVector to not go through the heap. SmallVector<DstOp, 8> TmpVec(Res.begin(), Res.end()); + assert(TmpVec.size() > 1); return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op); } |