diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 40 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 15 |
2 files changed, 48 insertions, 7 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 7e3c0ed809c..180d3d5241b 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -784,6 +784,46 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { switch (MI.getOpcode()) { default: return UnableToLegalize; + case TargetOpcode::G_MERGE_VALUES: { + if (TypeIdx != 1) + return UnableToLegalize; + + unsigned DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + if (!DstTy.isScalar()) + return UnableToLegalize; + + unsigned NumSrc = MI.getNumOperands() - 1; + unsigned EltSize = DstTy.getSizeInBits() / NumSrc; + LLT EltTy = LLT::scalar(EltSize); + + unsigned ResultReg = MRI.createGenericVirtualRegister(DstTy); + unsigned Offset = 0; + for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I, + Offset += EltSize) { + assert(MRI.getType(MI.getOperand(I).getReg()) == EltTy); + + unsigned ShiftAmt = MRI.createGenericVirtualRegister(DstTy); + unsigned Shl = MRI.createGenericVirtualRegister(DstTy); + unsigned ZextInput = MRI.createGenericVirtualRegister(DstTy); + MIRBuilder.buildZExt(ZextInput, MI.getOperand(I).getReg()); + + if (Offset != 0) { + unsigned NextResult = I + 1 == E ? DstReg : + MRI.createGenericVirtualRegister(DstTy); + + MIRBuilder.buildConstant(ShiftAmt, Offset); + MIRBuilder.buildShl(Shl, ZextInput, ShiftAmt); + MIRBuilder.buildOr(NextResult, ResultReg, Shl); + ResultReg = NextResult; + } else { + ResultReg = ZextInput; + } + } + + MI.eraseFromParent(); + return Legalized; + } case TargetOpcode::G_UADDO: case TargetOpcode::G_USUBO: { if (TypeIdx == 1) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 1338ffed6e0..43c86aba3ad 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -168,7 +168,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST, .legalFor({{S64, S32}, {S32, S16}, {S64, S16}, {S32, S1}, {S64, S1}, {S16, S1}, // FIXME: Hack - {S128, S32}}) + {S128, S32}, {S128, S64}, {S32, LLT::scalar(24)}}) .scalarize(0); getActionDefinitionsBuilder({G_SITOFP, G_UITOFP}) @@ -390,6 +390,13 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST, }; getActionDefinitionsBuilder(Op) + .widenScalarToNextPow2(LitTyIdx, /*Min*/ 16) + // Clamp the little scalar to s8-s256 and make it a power of 2. It's not + // worth considering the multiples of 64 since 2*192 and 2*384 are not + // valid. + .clampScalar(LitTyIdx, S16, S256) + .widenScalarToNextPow2(LitTyIdx, /*Min*/ 32) + // Break up vectors with weird elements into scalars .fewerElementsIf( [=](const LegalityQuery &Query) { return notValidElt(Query, 0); }, @@ -416,12 +423,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST, } return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits)); }) - .widenScalarToNextPow2(LitTyIdx, /*Min*/ 16) - // Clamp the little scalar to s8-s256 and make it a power of 2. It's not - // worth considering the multiples of 64 since 2*192 and 2*384 are not - // valid. - .clampScalar(LitTyIdx, S16, S256) - .widenScalarToNextPow2(LitTyIdx, /*Min*/ 32) .legalIf([=](const LegalityQuery &Query) { const LLT &BigTy = Query.Types[BigTyIdx]; const LLT &LitTy = Query.Types[LitTyIdx]; |

