diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-01-30 04:19:31 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-01-30 04:19:31 +0000 |
| commit | dc6c78596b76856d98830060359467483e58d36e (patch) | |
| tree | c24011d3e40a0c942f36151def609ce286e97b7a /llvm/lib | |
| parent | a8710a6e7ea8bca33f25b0af796c34cf88bde066 (diff) | |
| download | bcm5719-llvm-dc6c78596b76856d98830060359467483e58d36e.tar.gz bcm5719-llvm-dc6c78596b76856d98830060359467483e58d36e.zip | |
GlobalISel: Implement fewerElementsVector for select
llvm-svn: 352601
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 74 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 21 |
2 files changed, 94 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 550cd05de5b..5fb5aae1c6d 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1679,6 +1679,78 @@ LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx, } LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned CondReg = MI.getOperand(1).getReg(); + + unsigned NumParts = 0; + LLT NarrowTy0, NarrowTy1; + + LLT DstTy = MRI.getType(DstReg); + LLT CondTy = MRI.getType(CondReg); + unsigned Size = DstTy.getSizeInBits(); + + assert(TypeIdx == 0 || CondTy.isVector()); + + if (TypeIdx == 0) { + NarrowTy0 = NarrowTy; + NarrowTy1 = CondTy; + + unsigned NarrowSize = NarrowTy0.getSizeInBits(); + // FIXME: Don't know how to handle the situation where the small vectors + // aren't all the same size yet. + if (Size % NarrowSize != 0) + return UnableToLegalize; + + NumParts = Size / NarrowSize; + + // Need to break down the condition type + if (CondTy.isVector()) { + if (CondTy.getNumElements() == NumParts) + NarrowTy1 = CondTy.getElementType(); + else + NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts, + CondTy.getScalarSizeInBits()); + } + } else { + NumParts = CondTy.getNumElements(); + if (NarrowTy.isVector()) { + // TODO: Handle uneven breakdown. + if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements()) + return UnableToLegalize; + + return UnableToLegalize; + } else { + NarrowTy0 = DstTy.getElementType(); + NarrowTy1 = NarrowTy; + } + } + + SmallVector<unsigned, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs; + if (CondTy.isVector()) + extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs); + + extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs); + extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs); + + for (unsigned i = 0; i < NumParts; ++i) { + unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy0); + MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg, + Src1Regs[i], Src2Regs[i]); + DstRegs.push_back(DstReg); + } + + if (NarrowTy0.isVector()) + MIRBuilder.buildConcatVectors(DstReg, DstRegs); + else + MIRBuilder.buildBuildVector(DstReg, DstRegs); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorLoadStore(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { // FIXME: Don't know how to handle secondary types yet. @@ -1784,6 +1856,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_ICMP: case G_FCMP: return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy); + case G_SELECT: + return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy); case G_LOAD: case G_STORE: return fewerElementsVectorLoadStore(MI, TypeIdx, NarrowTy); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 0da46072702..d665ffbb146 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -314,7 +314,26 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST, getActionDefinitionsBuilder(G_SELECT) .legalFor({{S32, S1}, {S64, S1}, {V2S32, S1}, {V2S16, S1}}) .clampScalar(0, S32, S64) - .scalarize(0); + .fewerElementsIf( + [=](const LegalityQuery &Query) { + if (Query.Types[1].isVector()) + return true; + + LLT Ty = Query.Types[0]; + + // FIXME: Hack until odd splits handled + return Ty.isVector() && + (Ty.getScalarSizeInBits() > 32 || Ty.getNumElements() % 2 != 0); + }, + scalarize(0)) + // FIXME: Handle 16-bit vectors better + .fewerElementsIf( + [=](const LegalityQuery &Query) { + return Query.Types[0].isVector() && + Query.Types[0].getElementType().getSizeInBits() < 32;}, + scalarize(0)) + .scalarize(1) + .clampMaxNumElements(0, S32, 2); // TODO: Only the low 4/5/6 bits of the shift amount are observed, so we can // be more flexible with the shift amount type. |

