diff options
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td | 7 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 74 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 29 | 
3 files changed, 18 insertions, 92 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td index 3688cd77542..be133b19c26 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td @@ -35,11 +35,6 @@ def CC_SI : CallingConv<[      SGPR104, SGPR105    ]>>>, -  // We have no way of referring to the generated register tuples -  // here, so use a custom function. -  CCIfInReg<CCIfType<[i64], CCCustom<"allocateSGPRTuple">>>, -  CCIfByVal<CCIfType<[i64], CCCustom<"allocateSGPRTuple">>>, -    // 32*4 + 4 is the minimum for a fetch shader consumer with 32 inputs.    CCIfNotInReg<CCIfType<[f32, i32, f16, v2i16, v2f16] , CCAssignToReg<[      VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7, @@ -138,7 +133,6 @@ def CC_AMDGPU_Func : CallingConv<[      VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,      VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,      VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>, -  CCIfType<[i64, f64, v2i32, v2f32, v3i32, v3f32, v4i32, v4f32, v5i32, v5f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64, v4i16, v4f16], CCCustom<"allocateVGPRTuple">>,    CCIfType<[i32, f32, v2i16, v2f16, i16, f16, i1], CCAssignToStack<4, 4>>,    CCIfType<[i64, f64, v2i32, v2f32], CCAssignToStack<8, 4>>,    CCIfType<[v3i32, v3f32], CCAssignToStack<12, 4>>, @@ -157,7 +151,6 @@ def RetCC_AMDGPU_Func : CallingConv<[      VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,      VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,      VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>, -  CCIfType<[i64, f64, v2i32, v2f32, v4i32, v4f32, v8i32, v8f32, v16i32, v16f32, v2i64, v2f64, v4i16, v4f16], CCCustom<"allocateVGPRTuple">>  ]>;  def CC_AMDGPU : CallingConv<[ diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 39016ed3719..73aff33f2e0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -39,80 +39,6 @@  #include "llvm/Support/KnownBits.h"  using namespace llvm; -static bool allocateCCRegs(unsigned ValNo, MVT ValVT, MVT LocVT, -                           CCValAssign::LocInfo LocInfo, -                           ISD::ArgFlagsTy ArgFlags, CCState &State, -                           const TargetRegisterClass *RC, -                           unsigned NumRegs) { -  ArrayRef<MCPhysReg> RegList = makeArrayRef(RC->begin(), NumRegs); -  unsigned RegResult = State.AllocateReg(RegList); -  if (RegResult == AMDGPU::NoRegister) -    return false; - -  State.addLoc(CCValAssign::getReg(ValNo, ValVT, RegResult, LocVT, LocInfo)); -  return true; -} - -static bool allocateSGPRTuple(unsigned ValNo, MVT ValVT, MVT LocVT, -                              CCValAssign::LocInfo LocInfo, -                              ISD::ArgFlagsTy ArgFlags, CCState &State) { -  switch (LocVT.SimpleTy) { -  case MVT::i64: -  case MVT::f64: -  case MVT::v2i32: -  case MVT::v2f32: -  case MVT::v4i16: -  case MVT::v4f16: { -    // Up to SGPR0-SGPR105 -    return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, -                          &AMDGPU::SGPR_64RegClass, 53); -  } -  default: -    return false; -  } -} - -// Allocate up to VGPR31. -// -// TODO: Since there are no VGPR alignent requirements would it be better to -// split into individual scalar registers? -static bool allocateVGPRTuple(unsigned ValNo, MVT ValVT, MVT LocVT, -                              CCValAssign::LocInfo LocInfo, -                              ISD::ArgFlagsTy ArgFlags, CCState &State) { -  switch (LocVT.SimpleTy) { -  case MVT::i64: -  case MVT::f64: -  case MVT::v2i32: -  case MVT::v2f32: -  case MVT::v4i16: -  case MVT::v4f16: { -    return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, -                          &AMDGPU::VReg_64RegClass, 31); -  } -  case MVT::v4i32: -  case MVT::v4f32: -  case MVT::v2i64: -  case MVT::v2f64: { -    return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, -                          &AMDGPU::VReg_128RegClass, 29); -  } -  case MVT::v8i32: -  case MVT::v8f32: { -    return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, -                          &AMDGPU::VReg_256RegClass, 25); - -  } -  case MVT::v16i32: -  case MVT::v16f32: { -    return allocateCCRegs(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, -                          &AMDGPU::VReg_512RegClass, 17); - -  } -  default: -    return false; -  } -} -  #include "AMDGPUGenCallingConv.inc"  // Find a larger type to do a load / store of a vector with. diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index db0782e2bf3..b8c7bd648f4 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -768,19 +768,22 @@ bool SITargetLowering::isShuffleMaskLegal(ArrayRef<int>, EVT) const {  MVT SITargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,                                                      CallingConv::ID CC,                                                      EVT VT) const { -  // TODO: Consider splitting all arguments into 32-bit pieces. -  if (CC != CallingConv::AMDGPU_KERNEL && VT.isVector()) { +  if (CC == CallingConv::AMDGPU_KERNEL) +    return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); + +  if (VT.isVector()) {      EVT ScalarVT = VT.getScalarType();      unsigned Size = ScalarVT.getSizeInBits();      if (Size == 32)        return ScalarVT.getSimpleVT(); -    if (Size == 64) +    if (Size > 32)        return MVT::i32;      if (Size == 16 && Subtarget->has16BitInsts())        return VT.isInteger() ? MVT::v2i16 : MVT::v2f16; -  } +  } else if (VT.getSizeInBits() > 32) +    return MVT::i32;    return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);  } @@ -788,7 +791,10 @@ MVT SITargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,  unsigned SITargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,                                                           CallingConv::ID CC,                                                           EVT VT) const { -  if (CC != CallingConv::AMDGPU_KERNEL && VT.isVector()) { +  if (CC == CallingConv::AMDGPU_KERNEL) +    return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); + +  if (VT.isVector()) {      unsigned NumElts = VT.getVectorNumElements();      EVT ScalarVT = VT.getScalarType();      unsigned Size = ScalarVT.getSizeInBits(); @@ -796,12 +802,13 @@ unsigned SITargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,      if (Size == 32)        return NumElts; -    if (Size == 64) -      return 2 * NumElts; +    if (Size > 32) +      return NumElts * ((Size + 31) / 32);      if (Size == 16 && Subtarget->has16BitInsts()) -      return (VT.getVectorNumElements() + 1) / 2; -  } +      return (NumElts + 1) / 2; +  } else if (VT.getSizeInBits() > 32) +    return (VT.getSizeInBits() + 31) / 32;    return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);  } @@ -821,10 +828,10 @@ unsigned SITargetLowering::getVectorTypeBreakdownForCallingConv(        return NumIntermediates;      } -    if (Size == 64) { +    if (Size > 32) {        RegisterVT = MVT::i32;        IntermediateVT = RegisterVT; -      NumIntermediates = 2 * NumElts; +      NumIntermediates = NumElts * ((Size + 31) / 32);        return NumIntermediates;      }  | 

