summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td7
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp74
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp29
3 files changed, 18 insertions, 92 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
index 3688cd77542..be133b19c26 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
@@ -35,11 +35,6 @@ def CC_SI : CallingConv<[
SGPR104, SGPR105
]>>>,
- // We have no way of referring to the generated register tuples
- // here, so use a custom function.
- CCIfInReg<CCIfType<[i64], CCCustom<"allocateSGPRTuple">>>,
- CCIfByVal<CCIfType<[i64], CCCustom<"allocateSGPRTuple">>>,
-
// 32*4 + 4 is the minimum for a fetch shader consumer with 32 inputs.
CCIfNotInReg<CCIfType<[f32, i32, f16, v2i16, v2f16] , CCAssignToReg<[
VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
@@ -138,7 +133,6 @@ def CC_AMDGPU_Func : CallingConv<[
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>,
- CCIfType<[i64, f64, v2i32, v2f32, v3i32, v3f32, v4i32, v4f32, v5i32, v5f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64, v4i16, v4f16], CCCustom<"allocateVGPRTuple">>,
CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1], CCAssignToStack<4, 4>>,
CCIfType<[i64, f64, v2i32, v2f32], CCAssignToStack<8, 4>>,
CCIfType<[v3i32, v3f32], CCAssignToStack<12, 4>>,
@@ -157,7 +151,6 @@ def RetCC_AMDGPU_Func : CallingConv<[
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>,
- CCIfType<[i64, f64, v2i32, v2f32, v4i32, v4f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64, v4i16, v4f16], CCCustom<"allocateVGPRTuple">>
]>;
def CC_AMDGPU : CallingConv<[
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 39016ed3719..73aff33f2e0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -39,80 +39,6 @@
#include "llvm/Support/KnownBits.h"
using namespace llvm;
-static bool allocateCCRegs(unsigned ValNo, MVT ValVT, MVT LocVT,
- CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State,
- const TargetRegisterClass *RC,
- unsigned NumRegs) {
- ArrayRef<MCPhysReg> RegList = makeArrayRef(RC->begin(), NumRegs);
- unsigned RegResult = State.AllocateReg(RegList);
- if (RegResult == AMDGPU::NoRegister)
- return false;
-
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, RegResult, LocVT, LocInfo));
- return true;
-}
-
-static bool allocateSGPRTuple(unsigned ValNo, MVT ValVT, MVT LocVT,
- CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State) {
- switch (LocVT.SimpleTy) {
- case MVT::i64:
- case MVT::f64:
- case MVT::v2i32:
- case MVT::v2f32:
- case MVT::v4i16:
- case MVT::v4f16: {
- // Up to SGPR0-SGPR105
- return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State,
- &AMDGPU::SGPR_64RegClass, 53);
- }
- default:
- return false;
- }
-}
-
-// Allocate up to VGPR31.
-//
-// TODO: Since there are no VGPR alignent requirements would it be better to
-// split into individual scalar registers?
-static bool allocateVGPRTuple(unsigned ValNo, MVT ValVT, MVT LocVT,
- CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State) {
- switch (LocVT.SimpleTy) {
- case MVT::i64:
- case MVT::f64:
- case MVT::v2i32:
- case MVT::v2f32:
- case MVT::v4i16:
- case MVT::v4f16: {
- return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State,
- &AMDGPU::VReg_64RegClass, 31);
- }
- case MVT::v4i32:
- case MVT::v4f32:
- case MVT::v2i64:
- case MVT::v2f64: {
- return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State,
- &AMDGPU::VReg_128RegClass, 29);
- }
- case MVT::v8i32:
- case MVT::v8f32: {
- return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State,
- &AMDGPU::VReg_256RegClass, 25);
-
- }
- case MVT::v16i32:
- case MVT::v16f32: {
- return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State,
- &AMDGPU::VReg_512RegClass, 17);
-
- }
- default:
- return false;
- }
-}
-
#include "AMDGPUGenCallingConv.inc"
// Find a larger type to do a load / store of a vector with.
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index db0782e2bf3..b8c7bd648f4 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -768,19 +768,22 @@ bool SITargetLowering::isShuffleMaskLegal(ArrayRef<int>, EVT) const {
MVT SITargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
- // TODO: Consider splitting all arguments into 32-bit pieces.
- if (CC != CallingConv::AMDGPU_KERNEL && VT.isVector()) {
+ if (CC == CallingConv::AMDGPU_KERNEL)
+ return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
+
+ if (VT.isVector()) {
EVT ScalarVT = VT.getScalarType();
unsigned Size = ScalarVT.getSizeInBits();
if (Size == 32)
return ScalarVT.getSimpleVT();
- if (Size == 64)
+ if (Size > 32)
return MVT::i32;
if (Size == 16 && Subtarget->has16BitInsts())
return VT.isInteger() ? MVT::v2i16 : MVT::v2f16;
- }
+ } else if (VT.getSizeInBits() > 32)
+ return MVT::i32;
return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
}
@@ -788,7 +791,10 @@ MVT SITargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
unsigned SITargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
- if (CC != CallingConv::AMDGPU_KERNEL && VT.isVector()) {
+ if (CC == CallingConv::AMDGPU_KERNEL)
+ return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
+
+ if (VT.isVector()) {
unsigned NumElts = VT.getVectorNumElements();
EVT ScalarVT = VT.getScalarType();
unsigned Size = ScalarVT.getSizeInBits();
@@ -796,12 +802,13 @@ unsigned SITargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
if (Size == 32)
return NumElts;
- if (Size == 64)
- return 2 * NumElts;
+ if (Size > 32)
+ return NumElts * ((Size + 31) / 32);
if (Size == 16 && Subtarget->has16BitInsts())
- return (VT.getVectorNumElements() + 1) / 2;
- }
+ return (NumElts + 1) / 2;
+ } else if (VT.getSizeInBits() > 32)
+ return (VT.getSizeInBits() + 31) / 32;
return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
}
@@ -821,10 +828,10 @@ unsigned SITargetLowering::getVectorTypeBreakdownForCallingConv(
return NumIntermediates;
}
- if (Size == 64) {
+ if (Size > 32) {
RegisterVT = MVT::i32;
IntermediateVT = RegisterVT;
- NumIntermediates = 2 * NumElts;
+ NumIntermediates = NumElts * ((Size + 31) / 32);
return NumIntermediates;
}
OpenPOWER on IntegriCloud