summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp131
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h12
-rw-r--r--llvm/lib/Target/AMDGPU/R600ISelLowering.cpp8
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp10
4 files changed, 109 insertions, 52 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 8196cfdcc08..030166539c7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -37,7 +37,7 @@ static bool allocateKernArg(unsigned ValNo, MVT ValVT, MVT LocVT,
MachineFunction &MF = State.getMachineFunction();
AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();
- uint64_t Offset = MFI->allocateKernArg(ValVT.getStoreSize(),
+ uint64_t Offset = MFI->allocateKernArg(LocVT.getStoreSize(),
ArgFlags.getOrigAlign());
State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
return true;
@@ -626,9 +626,104 @@ bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const {
// TargetLowering Callbacks
//===---------------------------------------------------------------------===//
-void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
+/// The SelectionDAGBuilder will automatically promote function arguments
+/// with illegal types. However, this does not work for the AMDGPU targets
+/// since the function arguments are stored in memory as these illegal types.
+/// In order to handle this properly we need to get the original types sizes
+/// from the LLVM IR Function and fixup the ISD:InputArg values before
+/// passing them to AnalyzeFormalArguments()
+
+/// When the SelectionDAGBuilder computes the Ins, it takes care of splitting
+/// input values across multiple registers. Each item in the Ins array
+/// represents a single value that will be stored in regsters. Ins[x].VT is
+/// the value type of the value that will be stored in the register, so
+/// whatever SDNode we lower the argument to needs to be this type.
+///
+/// In order to correctly lower the arguments we need to know the size of each
+/// argument. Since Ins[x].VT gives us the size of the register that will
+/// hold the value, we need to look at Ins[x].ArgVT to see the 'real' type
+/// for the orignal function argument so that we can deduce the correct memory
+/// type to use for Ins[x]. In most cases the correct memory type will be
+/// Ins[x].ArgVT. However, this will not always be the case. If, for example,
+/// we have a kernel argument of type v8i8, this argument will be split into
+/// 8 parts and each part will be represented by its own item in the Ins array.
+/// For each part the Ins[x].ArgVT will be the v8i8, which is the full type of
+/// the argument before it was split. From this, we deduce that the memory type
+/// for each individual part is i8. We pass the memory type as LocVT to the
+/// calling convention analysis function and the register type (Ins[x].VT) as
+/// the ValVT.
+void AMDGPUTargetLowering::analyzeFormalArgumentsCompute(CCState &State,
const SmallVectorImpl<ISD::InputArg> &Ins) const {
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+ const ISD::InputArg &In = Ins[i];
+ EVT MemVT;
+
+ unsigned NumRegs = getNumRegisters(State.getContext(), In.ArgVT);
+
+ if (In.ArgVT == MVT::i16 || In.ArgVT == MVT::i8 || In.ArgVT == MVT::f16) {
+ // The ABI says the caller will extend these values to 32-bits.
+ MemVT = In.ArgVT.isInteger() ? MVT::i32 : MVT::f32;
+ } else if (NumRegs == 1) {
+ // This argument is not split, so the IR type is the memory type.
+ assert(!In.Flags.isSplit());
+ if (In.ArgVT.isExtended()) {
+ // We have an extended type, like i24, so we should just use the register type
+ MemVT = In.VT;
+ } else {
+ MemVT = In.ArgVT;
+ }
+ } else if (In.ArgVT.isVector() && In.VT.isVector() &&
+ In.ArgVT.getScalarType() == In.VT.getScalarType()) {
+ assert(In.ArgVT.getVectorNumElements() > In.VT.getVectorNumElements());
+ // We have a vector value which has been split into a vector with
+ // the same scalar type, but fewer elements. This should handle
+ // all the floating-point vector types.
+ MemVT = In.VT;
+ } else if (In.ArgVT.isVector() &&
+ In.ArgVT.getVectorNumElements() == NumRegs) {
+ // This arg has been split so that each element is stored in a separate
+ // register.
+ MemVT = In.ArgVT.getScalarType();
+ } else if (In.ArgVT.isExtended()) {
+ // We have an extended type, like i65.
+ MemVT = In.VT;
+ } else {
+ unsigned MemoryBits = In.ArgVT.getStoreSizeInBits() / NumRegs;
+ assert(In.ArgVT.getStoreSizeInBits() % NumRegs == 0);
+ if (In.VT.isInteger()) {
+ MemVT = EVT::getIntegerVT(State.getContext(), MemoryBits);
+ } else if (In.VT.isVector()) {
+ assert(!In.VT.getScalarType().isFloatingPoint());
+ unsigned NumElements = In.VT.getVectorNumElements();
+ assert(MemoryBits % NumElements == 0);
+ // This vector type has been split into another vector type with
+ // a different elements size.
+ EVT ScalarVT = EVT::getIntegerVT(State.getContext(),
+ MemoryBits / NumElements);
+ MemVT = EVT::getVectorVT(State.getContext(), ScalarVT, NumElements);
+ } else {
+ llvm_unreachable("cannot deduce memory type.");
+ }
+ }
+
+ // Convert one element vectors to scalar.
+ if (MemVT.isVector() && MemVT.getVectorNumElements() == 1)
+ MemVT = MemVT.getScalarType();
+
+ if (MemVT.isExtended()) {
+ // This should really only happen if we have vec3 arguments
+ assert(MemVT.isVector() && MemVT.getVectorNumElements() == 3);
+ MemVT = MemVT.getPow2VectorType(State.getContext());
+ }
+
+ assert(MemVT.isSimple());
+ allocateKernArg(i, In.VT, MemVT.getSimpleVT(), CCValAssign::Full, In.Flags,
+ State);
+ }
+}
+void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
+ const SmallVectorImpl<ISD::InputArg> &Ins) const {
State.AnalyzeFormalArguments(Ins, CC_AMDGPU);
}
@@ -2617,38 +2712,6 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
// Helper functions
//===----------------------------------------------------------------------===//
-void AMDGPUTargetLowering::getOriginalFunctionArgs(
- SelectionDAG &DAG,
- const Function *F,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SmallVectorImpl<ISD::InputArg> &OrigIns) const {
-
- for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
- if (Ins[i].ArgVT == Ins[i].VT) {
- OrigIns.push_back(Ins[i]);
- continue;
- }
-
- EVT VT;
- if (Ins[i].ArgVT.isVector() && !Ins[i].VT.isVector()) {
- // Vector has been split into scalars.
- VT = Ins[i].ArgVT.getVectorElementType();
- } else if (Ins[i].VT.isVector() && Ins[i].ArgVT.isVector() &&
- Ins[i].ArgVT.getVectorElementType() !=
- Ins[i].VT.getVectorElementType()) {
- // Vector elements have been promoted
- VT = Ins[i].ArgVT;
- } else {
- // Vector has been spilt into smaller vectors.
- VT = Ins[i].VT;
- }
-
- ISD::InputArg Arg(Ins[i].Flags, VT, VT, Ins[i].Used,
- Ins[i].OrigArgIndex, Ins[i].PartOffset);
- OrigIns.push_back(Arg);
- }
-}
-
SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
const TargetRegisterClass *RC,
unsigned Reg, EVT VT) const {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 382a91e0536..fc042b28180 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -100,16 +100,8 @@ protected:
SDValue LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const;
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &Results) const;
- /// The SelectionDAGBuilder will automatically promote function arguments
- /// with illegal types. However, this does not work for the AMDGPU targets
- /// since the function arguments are stored in memory as these illegal types.
- /// In order to handle this properly we need to get the origianl types sizes
- /// from the LLVM IR Function and fixup the ISD:InputArg values before
- /// passing them to AnalyzeFormalArguments()
- void getOriginalFunctionArgs(SelectionDAG &DAG,
- const Function *F,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SmallVectorImpl<ISD::InputArg> &OrigIns) const;
+ void analyzeFormalArgumentsCompute(CCState &State,
+ const SmallVectorImpl<ISD::InputArg> &Ins) const;
void AnalyzeFormalArguments(CCState &State,
const SmallVectorImpl<ISD::InputArg> &Ins) const;
void AnalyzeReturn(CCState &State,
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index 6b391de00d1..35e6c9d036b 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -1512,9 +1512,11 @@ SDValue R600TargetLowering::LowerFormalArguments(
SmallVector<ISD::InputArg, 8> LocalIns;
- getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
-
- AnalyzeFormalArguments(CCInfo, LocalIns);
+ if (AMDGPU::isShader(CallConv)) {
+ AnalyzeFormalArguments(CCInfo, Ins);
+ } else {
+ analyzeFormalArgumentsCompute(CCInfo, Ins);
+ }
for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
CCValAssign &VA = ArgLocs[i];
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 6b94333bd41..92c832f7a46 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -685,9 +685,6 @@ SDValue SITargetLowering::LowerFormalArguments(
}
if (!AMDGPU::isShader(CallConv)) {
- getOriginalFunctionArgs(DAG, DAG.getMachineFunction().getFunction(), Ins,
- Splits);
-
assert(Info->hasWorkGroupIDX() && Info->hasWorkItemIDX());
} else {
assert(!Info->hasPrivateSegmentBuffer() && !Info->hasDispatchPtr() &&
@@ -735,7 +732,10 @@ SDValue SITargetLowering::LowerFormalArguments(
CCInfo.AllocateReg(FlatScratchInitReg);
}
- AnalyzeFormalArguments(CCInfo, Splits);
+ if (!AMDGPU::isShader(CallConv))
+ analyzeFormalArgumentsCompute(CCInfo, Ins);
+ else
+ AnalyzeFormalArguments(CCInfo, Splits);
SmallVector<SDValue, 16> Chains;
@@ -752,7 +752,7 @@ SDValue SITargetLowering::LowerFormalArguments(
if (VA.isMemLoc()) {
VT = Ins[i].VT;
- EVT MemVT = Splits[i].VT;
+ EVT MemVT = VA.getLocVT();
const unsigned Offset = Subtarget->getExplicitKernelArgOffset() +
VA.getLocMemOffset();
// The first 36 bytes of the input buffer contains information about
OpenPOWER on IntegriCloud