diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 61 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 11 |
2 files changed, 71 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 684b99d8bae..c5830ff8652 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2767,6 +2767,65 @@ LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI, } LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVectorBuildVector(MachineInstr &MI, + unsigned TypeIdx, + LLT NarrowTy) { + assert(TypeIdx == 0 && "not a vector type index"); + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = DstTy.getElementType(); + + int DstNumElts = DstTy.getNumElements(); + int NarrowNumElts = NarrowTy.getNumElements(); + int NumConcat = (DstNumElts + NarrowNumElts - 1) / NarrowNumElts; + LLT WidenedDstTy = LLT::vector(NarrowNumElts * NumConcat, SrcTy); + + SmallVector<Register, 8> ConcatOps; + SmallVector<Register, 8> SubBuildVector; + + Register UndefReg; + if (WidenedDstTy != DstTy) + UndefReg = MIRBuilder.buildUndef(SrcTy).getReg(0); + + // Create a G_CONCAT_VECTORS of NarrowTy pieces, padding with undef as + // necessary. + // + // %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2 + // -> <2 x s16> + // + // %4:_(s16) = G_IMPLICIT_DEF + // %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1 + // %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4 + // %7:_(<4 x s16>) = G_CONCAT_VECTORS %5, %6 + // %3:_(<3 x s16>) = G_EXTRACT %7, 0 + for (int I = 0; I != NumConcat; ++I) { + for (int J = 0; J != NarrowNumElts; ++J) { + int SrcIdx = NarrowNumElts * I + J; + + if (SrcIdx < DstNumElts) { + Register SrcReg = MI.getOperand(SrcIdx + 1).getReg(); + SubBuildVector.push_back(SrcReg); + } else + SubBuildVector.push_back(UndefReg); + } + + auto BuildVec = MIRBuilder.buildBuildVector(NarrowTy, SubBuildVector); + ConcatOps.push_back(BuildVec.getReg(0)); + SubBuildVector.clear(); + } + + if (DstTy == WidenedDstTy) + MIRBuilder.buildConcatVectors(DstReg, ConcatOps); + else { + auto Concat = MIRBuilder.buildConcatVectors(WidenedDstTy, ConcatOps); + MIRBuilder.buildExtract(DstReg, Concat, 0); + } + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { // FIXME: Don't know how to handle secondary types yet. @@ -2941,6 +3000,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy); case G_UNMERGE_VALUES: return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy); + case G_BUILD_VECTOR: + return fewerElementsVectorBuildVector(MI, TypeIdx, NarrowTy); case G_LOAD: case G_STORE: return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index c1c111a762d..7fe0298f1c3 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -64,6 +64,14 @@ static LegalityPredicate isSmallOddVector(unsigned TypeIdx) { }; } +static LegalityPredicate isWideVec16(unsigned TypeIdx) { + return [=](const LegalityQuery &Query) { + const LLT Ty = Query.Types[TypeIdx]; + const LLT EltTy = Ty.getScalarType(); + return EltTy.getSizeInBits() == 16 && Ty.getNumElements() > 2; + }; +} + static LegalizeMutation oneMoreElement(unsigned TypeIdx) { return [=](const LegalityQuery &Query) { const LLT Ty = Query.Types[TypeIdx]; @@ -945,7 +953,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .legalForCartesianProduct(AllS32Vectors, {S32}) .legalForCartesianProduct(AllS64Vectors, {S64}) .clampNumElements(0, V16S32, V32S32) - .clampNumElements(0, V2S64, V16S64); + .clampNumElements(0, V2S64, V16S64) + .fewerElementsIf(isWideVec16(0), changeTo(0, V2S16)); if (ST.hasScalarPackInsts()) BuildVector.legalFor({V2S16, S32}); |

