summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2019-01-25 02:36:32 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2019-01-25 02:36:32 +0000
commitca676343a9681534bca98665fa1e496fa89bcc60 (patch)
treeb97c40b0e284cb73a6b57cb5e7006a625ea8adb3 /llvm/lib
parent63bd043a1249224c09eb1d1eb95547b5f9af8c71 (diff)
downloadbcm5719-llvm-ca676343a9681534bca98665fa1e496fa89bcc60.tar.gz
bcm5719-llvm-ca676343a9681534bca98665fa1e496fa89bcc60.zip
GlobalISel: Implement fewerElementsVector for extensions
llvm-svn: 352155
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp54
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp9
2 files changed, 61 insertions, 2 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 3da0d846f19..611bb5dc802 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -1244,6 +1244,55 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
}
LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ unsigned DstReg = MI.getOperand(0).getReg();
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT SrcTy = MRI.getType(SrcReg);
+
+ LLT NarrowTy0 = NarrowTy;
+ LLT NarrowTy1;
+ unsigned NumParts;
+
+ if (NarrowTy.isScalar()) {
+ NumParts = DstTy.getNumElements();
+ NarrowTy1 = SrcTy.getElementType();
+ } else {
+ // Uneven breakdown not handled.
+ NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
+ if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
+ return UnableToLegalize;
+
+ NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits());
+ }
+
+ SmallVector<unsigned, 4> SrcRegs, DstRegs;
+ extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
+
+ for (unsigned I = 0; I < NumParts; ++I) {
+ unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
+ MachineInstr *NewInst = MIRBuilder.buildInstr(MI.getOpcode())
+ .addDef(DstReg)
+ .addUse(SrcRegs[I]);
+
+ NewInst->setFlags(MI.getFlags());
+ DstRegs.push_back(DstReg);
+ }
+
+ if (NarrowTy.isVector())
+ MIRBuilder.buildConcatVectors(DstReg, DstRegs);
+ else
+ MIRBuilder.buildBuildVector(DstReg, DstRegs);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy) {
// FIXME: Don't know how to handle secondary types yet.
@@ -1377,6 +1426,11 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
MI.eraseFromParent();
return Legalized;
}
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_ANYEXT:
+ case TargetOpcode::G_FPEXT:
+ return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
}
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 26dbd25ec9d..89b80ae39d0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -37,6 +37,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
const LLT S16 = LLT::scalar(16);
const LLT S32 = LLT::scalar(32);
const LLT S64 = LLT::scalar(64);
+ const LLT S128 = LLT::scalar(128);
const LLT S256 = LLT::scalar(256);
const LLT S512 = LLT::scalar(512);
@@ -148,7 +149,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
getActionDefinitionsBuilder(G_FPEXT)
.legalFor({{S64, S32}, {S32, S16}})
- .lowerFor({{S64, S16}}); // FIXME: Implement
+ .lowerFor({{S64, S16}}) // FIXME: Implement
+ .scalarize(0);
getActionDefinitionsBuilder(G_FSUB)
// Use actual fsub instruction
@@ -164,7 +166,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT})
.legalFor({{S64, S32}, {S32, S16}, {S64, S16},
- {S32, S1}, {S64, S1}, {S16, S1}});
+ {S32, S1}, {S64, S1}, {S16, S1},
+ // FIXME: Hack
+ {S128, S32}})
+ .scalarize(0);
getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
.legalFor({{S32, S32}, {S64, S32}});
OpenPOWER on IntegriCloud