diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 78 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 14 |
2 files changed, 83 insertions, 9 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 611bb5dc802..a06d4f565cb 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1284,6 +1284,73 @@ LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx, } if (NarrowTy.isVector()) + MIRBuilder.buildConcatVectors(DstReg, DstRegs); + else + MIRBuilder.buildBuildVector(DstReg, DstRegs); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned Src0Reg = MI.getOperand(2).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = MRI.getType(Src0Reg); + + unsigned NumParts; + LLT NarrowTy0, NarrowTy1; + + if (TypeIdx == 0) { + unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; + unsigned OldElts = DstTy.getNumElements(); + + NarrowTy0 = NarrowTy; + NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements(); + NarrowTy1 = NarrowTy.isVector() ? + LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) : + SrcTy.getElementType(); + + } else { + unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1; + unsigned OldElts = SrcTy.getNumElements(); + + NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : + NarrowTy.getNumElements(); + NarrowTy0 = LLT::vector(NarrowTy.getNumElements(), + DstTy.getScalarSizeInBits()); + NarrowTy1 = NarrowTy; + } + + // FIXME: Don't know how to handle the situation where the small vectors + // aren't all the same size yet. + if (NarrowTy1.isVector() && + NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements()) + return UnableToLegalize; + + CmpInst::Predicate Pred + = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); + + SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs; + extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs); + extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs); + + for (unsigned I = 0; I < NumParts; ++I) { + unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy0); + DstRegs.push_back(DstReg); + + if (MI.getOpcode() == TargetOpcode::G_ICMP) + MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); + else { + MachineInstr *NewCmp + = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]); + NewCmp->setFlags(MI.getFlags()); + } + } + + if (NarrowTy0.isVector()) MIRBuilder.buildConcatVectors(DstReg, DstRegs); else MIRBuilder.buildBuildVector(DstReg, DstRegs); @@ -1295,9 +1362,7 @@ LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx, LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { - // FIXME: Don't know how to handle secondary types yet. - if (TypeIdx != 0) - return UnableToLegalize; + using namespace TargetOpcode; MIRBuilder.setInstr(MI); unsigned Opc = MI.getOpcode(); @@ -1384,8 +1449,15 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_ICMP: + case TargetOpcode::G_FCMP: + return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy); case TargetOpcode::G_LOAD: case TargetOpcode::G_STORE: { + // FIXME: Don't know how to handle secondary types yet. + if (TypeIdx != 0) + return UnableToLegalize; + bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD; unsigned ValReg = MI.getOperand(0).getReg(); unsigned AddrReg = MI.getOperand(1).getReg(); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 89b80ae39d0..7c8cc49d40f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -160,10 +160,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST, .scalarize(0) .clampScalar(0, S32, S64); - setAction({G_FCMP, S1}, Legal); - setAction({G_FCMP, 1, S32}, Legal); - setAction({G_FCMP, 1, S64}, Legal); - getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT}) .legalFor({{S64, S32}, {S32, S16}, {S64, S16}, {S32, S1}, {S64, S1}, {S16, S1}, @@ -192,8 +188,14 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST, setAction({G_BLOCK_ADDR, CodePtr}, Legal); - setAction({G_ICMP, S1}, Legal); - setAction({G_ICMP, 1, S32}, Legal); + getActionDefinitionsBuilder({G_ICMP, G_FCMP}) + .legalFor({{S1, S32}, {S1, S64}}) + .widenScalarToNextPow2(1) + .clampScalar(1, S32, S64) + .clampMaxNumElements(0, S1, 1) + .clampMaxNumElements(1, S32, 1); + + setAction({G_CTLZ, S32}, Legal); setAction({G_CTLZ_ZERO_UNDEF, S32}, Legal); |