summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp40
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp15
2 files changed, 48 insertions, 7 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 7e3c0ed809c..180d3d5241b 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -784,6 +784,46 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
switch (MI.getOpcode()) {
default:
return UnableToLegalize;
+ case TargetOpcode::G_MERGE_VALUES: {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ unsigned DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ if (!DstTy.isScalar())
+ return UnableToLegalize;
+
+ unsigned NumSrc = MI.getNumOperands() - 1;
+ unsigned EltSize = DstTy.getSizeInBits() / NumSrc;
+ LLT EltTy = LLT::scalar(EltSize);
+
+ unsigned ResultReg = MRI.createGenericVirtualRegister(DstTy);
+ unsigned Offset = 0;
+ for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I,
+ Offset += EltSize) {
+ assert(MRI.getType(MI.getOperand(I).getReg()) == EltTy);
+
+ unsigned ShiftAmt = MRI.createGenericVirtualRegister(DstTy);
+ unsigned Shl = MRI.createGenericVirtualRegister(DstTy);
+ unsigned ZextInput = MRI.createGenericVirtualRegister(DstTy);
+ MIRBuilder.buildZExt(ZextInput, MI.getOperand(I).getReg());
+
+ if (Offset != 0) {
+ unsigned NextResult = I + 1 == E ? DstReg :
+ MRI.createGenericVirtualRegister(DstTy);
+
+ MIRBuilder.buildConstant(ShiftAmt, Offset);
+ MIRBuilder.buildShl(Shl, ZextInput, ShiftAmt);
+ MIRBuilder.buildOr(NextResult, ResultReg, Shl);
+ ResultReg = NextResult;
+ } else {
+ ResultReg = ZextInput;
+ }
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
case TargetOpcode::G_UADDO:
case TargetOpcode::G_USUBO: {
if (TypeIdx == 1)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 1338ffed6e0..43c86aba3ad 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -168,7 +168,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
.legalFor({{S64, S32}, {S32, S16}, {S64, S16},
{S32, S1}, {S64, S1}, {S16, S1},
// FIXME: Hack
- {S128, S32}})
+ {S128, S32}, {S128, S64}, {S32, LLT::scalar(24)}})
.scalarize(0);
getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
@@ -390,6 +390,13 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
};
getActionDefinitionsBuilder(Op)
+ .widenScalarToNextPow2(LitTyIdx, /*Min*/ 16)
+ // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
+ // worth considering the multiples of 64 since 2*192 and 2*384 are not
+ // valid.
+ .clampScalar(LitTyIdx, S16, S256)
+ .widenScalarToNextPow2(LitTyIdx, /*Min*/ 32)
+
// Break up vectors with weird elements into scalars
.fewerElementsIf(
[=](const LegalityQuery &Query) { return notValidElt(Query, 0); },
@@ -416,12 +423,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
}
return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits));
})
- .widenScalarToNextPow2(LitTyIdx, /*Min*/ 16)
- // Clamp the little scalar to s8-s256 and make it a power of 2. It's not
- // worth considering the multiples of 64 since 2*192 and 2*384 are not
- // valid.
- .clampScalar(LitTyIdx, S16, S256)
- .widenScalarToNextPow2(LitTyIdx, /*Min*/ 32)
.legalIf([=](const LegalityQuery &Query) {
const LLT &BigTy = Query.Types[BigTyIdx];
const LLT &LitTy = Query.Types[LitTyIdx];
OpenPOWER on IntegriCloud