summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp44
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp48
2 files changed, 65 insertions, 27 deletions
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index d2c507d4e18..ed56ca9e081 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -1206,7 +1206,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
return UnableToLegalize;
MIRBuilder.setInstr(MI);
- switch (MI.getOpcode()) {
+ unsigned Opc = MI.getOpcode();
+ switch (Opc) {
default:
return UnableToLegalize;
case TargetOpcode::G_IMPLICIT_DEF: {
@@ -1235,9 +1236,18 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
MI.eraseFromParent();
return Legalized;
}
- case TargetOpcode::G_ADD: {
+ case TargetOpcode::G_ADD:
+ case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_FSUB:
+ case TargetOpcode::G_FNEG:
+ case TargetOpcode::G_FABS:
+ case TargetOpcode::G_FDIV:
+ case TargetOpcode::G_FREM:
+ case TargetOpcode::G_FMA: {
unsigned NarrowSize = NarrowTy.getSizeInBits();
unsigned DstReg = MI.getOperand(0).getReg();
+ unsigned Flags = MI.getFlags();
unsigned Size = MRI.getType(DstReg).getSizeInBits();
int NumParts = Size / NarrowSize;
// FIXME: Don't know how to handle the situation where the small vectors
@@ -1245,17 +1255,37 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
if (Size % NarrowSize != 0)
return UnableToLegalize;
- SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
- extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
- extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
+ unsigned NumOps = MI.getNumOperands() - 1;
+ SmallVector<unsigned, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
+
+ extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src0Regs);
+
+ if (NumOps >= 2)
+ extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src1Regs);
+
+ if (NumOps >= 3)
+ extractParts(MI.getOperand(3).getReg(), NarrowTy, NumParts, Src2Regs);
for (int i = 0; i < NumParts; ++i) {
unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
- MIRBuilder.buildAdd(DstReg, Src1Regs[i], Src2Regs[i]);
+
+ if (NumOps == 1)
+ MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i]}, Flags);
+ else if (NumOps == 2) {
+ MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i], Src1Regs[i]}, Flags);
+ } else if (NumOps == 3) {
+ MIRBuilder.buildInstr(Opc, {DstReg},
+ {Src0Regs[i], Src1Regs[i], Src2Regs[i]}, Flags);
+ }
+
DstRegs.push_back(DstReg);
}
- MIRBuilder.buildConcatVectors(DstReg, DstRegs);
+ if (NarrowTy.isVector())
+ MIRBuilder.buildConcatVectors(DstReg, DstRegs);
+ else
+ MIRBuilder.buildBuildVector(DstReg, DstRegs);
+
MI.eraseFromParent();
return Legalized;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 81fd43e73a4..a57da6493ce 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -28,6 +28,11 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
const GCNTargetMachine &TM) {
using namespace TargetOpcode;
+ auto scalarize = [=](const LegalityQuery &Query, unsigned TypeIdx) {
+ const LLT &Ty = Query.Types[TypeIdx];
+ return std::make_pair(TypeIdx, Ty.getElementType());
+ };
+
auto GetAddrSpacePtr = [&TM](unsigned AS) {
return LLT::pointer(AS, TM.getPointerSizeInBits(AS));
};
@@ -136,10 +141,12 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
setAction({G_FRAME_INDEX, PrivatePtr}, Legal);
- getActionDefinitionsBuilder(
- { G_FADD, G_FMUL, G_FNEG, G_FABS, G_FMA})
- .legalFor({S32, S64})
- .clampScalar(0, S32, S64);
+ getActionDefinitionsBuilder({G_FADD, G_FMUL, G_FNEG, G_FABS, G_FMA})
+ .legalFor({S32, S64})
+ .fewerElementsIf(
+ [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
+ [=](const LegalityQuery &Query) { return scalarize(Query, 0); })
+ .clampScalar(0, S32, S64);
getActionDefinitionsBuilder(G_FPTRUNC)
.legalFor({{S32, S64}, {S16, S32}});
@@ -149,11 +156,14 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
.lowerFor({{S64, S16}}); // FIXME: Implement
getActionDefinitionsBuilder(G_FSUB)
- // Use actual fsub instruction
- .legalFor({S32})
- // Must use fadd + fneg
- .lowerFor({S64, S16})
- .clampScalar(0, S32, S64);
+ // Use actual fsub instruction
+ .legalFor({S32})
+ // Must use fadd + fneg
+ .lowerFor({S64, S16, V2S16})
+ .fewerElementsIf(
+ [=](const LegalityQuery &Query) { return Query.Types[0].isVector(); },
+ [=](const LegalityQuery &Query) { return scalarize(Query, 0); })
+ .clampScalar(0, S32, S64);
setAction({G_FCMP, S1}, Legal);
setAction({G_FCMP, 1, S32}, Legal);
@@ -295,11 +305,15 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
});
getActionDefinitionsBuilder(G_BUILD_VECTOR)
- .legalForCartesianProduct(AllS32Vectors, {S32})
- .legalForCartesianProduct(AllS64Vectors, {S64})
- .clampNumElements(0, V16S32, V16S32)
- .clampNumElements(0, V2S64, V8S64)
- .minScalarSameAs(1, 0);
+ .legalForCartesianProduct(AllS32Vectors, {S32})
+ .legalForCartesianProduct(AllS64Vectors, {S64})
+ .clampNumElements(0, V16S32, V16S32)
+ .clampNumElements(0, V2S64, V8S64)
+ .minScalarSameAs(1, 0)
+ // FIXME: Sort of a hack to make progress on other legalizations.
+ .legalIf([=](const LegalityQuery &Query) {
+ return Query.Types[0].getScalarSizeInBits() < 32;
+ });
// TODO: Support any combination of v2s32
getActionDefinitionsBuilder(G_CONCAT_VECTORS)
@@ -328,12 +342,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
return false;
};
- auto scalarize =
- [=](const LegalityQuery &Query, unsigned TypeIdx) {
- const LLT &Ty = Query.Types[TypeIdx];
- return std::make_pair(TypeIdx, Ty.getElementType());
- };
-
getActionDefinitionsBuilder(Op)
// Break up vectors with weird elements into scalars
.fewerElementsIf(
OpenPOWER on IntegriCloud