diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-07-19 13:57:44 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-07-19 13:57:44 +0000 |
| commit | 1022c0dfde51a32c31cb912ccf8256e60debebfd (patch) | |
| tree | 2b9ac120eff33333517f204fd68c9b81f7e66398 /llvm/lib/Target/AMDGPU/SIISelLowering.cpp | |
| parent | 8bb8915d43fe687237f31014f34fc6f2f79fb5ff (diff) | |
| download | bcm5719-llvm-1022c0dfde51a32c31cb912ccf8256e60debebfd.tar.gz bcm5719-llvm-1022c0dfde51a32c31cb912ccf8256e60debebfd.zip | |
AMDGPU: Decompose all values to 32-bit pieces for calling conventions
This is the more natural lowering, and presents more opportunities to
reduce 64-bit ops to 32-bit.
This should also help avoid issues graphics shaders have had with
64-bit values, and simplify argument lowering in globalisel.
llvm-svn: 366578
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIISelLowering.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 29 |
1 files changed, 18 insertions, 11 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index db0782e2bf3..b8c7bd648f4 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -768,19 +768,22 @@ bool SITargetLowering::isShuffleMaskLegal(ArrayRef<int>, EVT) const { MVT SITargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const { - // TODO: Consider splitting all arguments into 32-bit pieces. - if (CC != CallingConv::AMDGPU_KERNEL && VT.isVector()) { + if (CC == CallingConv::AMDGPU_KERNEL) + return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); + + if (VT.isVector()) { EVT ScalarVT = VT.getScalarType(); unsigned Size = ScalarVT.getSizeInBits(); if (Size == 32) return ScalarVT.getSimpleVT(); - if (Size == 64) + if (Size > 32) return MVT::i32; if (Size == 16 && Subtarget->has16BitInsts()) return VT.isInteger() ? MVT::v2i16 : MVT::v2f16; - } + } else if (VT.getSizeInBits() > 32) + return MVT::i32; return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); } @@ -788,7 +791,10 @@ MVT SITargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, unsigned SITargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const { - if (CC != CallingConv::AMDGPU_KERNEL && VT.isVector()) { + if (CC == CallingConv::AMDGPU_KERNEL) + return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); + + if (VT.isVector()) { unsigned NumElts = VT.getVectorNumElements(); EVT ScalarVT = VT.getScalarType(); unsigned Size = ScalarVT.getSizeInBits(); @@ -796,12 +802,13 @@ unsigned SITargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, if (Size == 32) return NumElts; - if (Size == 64) - return 2 * NumElts; + if (Size > 32) + return NumElts * ((Size + 31) / 32); if (Size == 16 && Subtarget->has16BitInsts()) - return (VT.getVectorNumElements() + 1) / 2; - } + return (NumElts + 1) / 2; + } else if (VT.getSizeInBits() > 32) + return (VT.getSizeInBits() + 31) / 32; return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); } @@ -821,10 +828,10 @@ unsigned SITargetLowering::getVectorTypeBreakdownForCallingConv( return NumIntermediates; } - if (Size == 64) { + if (Size > 32) { RegisterVT = MVT::i32; IntermediateVT = RegisterVT; - NumIntermediates = 2 * NumElts; + NumIntermediates = NumElts * ((Size + 31) / 32); return NumIntermediates; } |

