diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2018-07-31 19:29:04 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2018-07-31 19:29:04 +0000 |
commit | feedabfde7236180988d7108fcdd713405461268 (patch) | |
tree | d4e6fab59920a6945220266167067a425b62f6d4 | |
parent | 05220a900cf8309fcdbe3e82344c65081b730374 (diff) | |
download | bcm5719-llvm-feedabfde7236180988d7108fcdd713405461268.tar.gz bcm5719-llvm-feedabfde7236180988d7108fcdd713405461268.zip |
AMDGPU: Break 64-bit arguments into 32-bit pieces
llvm-svn: 338421
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 19 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/call-argument-types.ll | 50 |
2 files changed, 59 insertions, 10 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index d6647c7fe85..ee3c0289b6d 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -701,9 +701,12 @@ MVT SITargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, if (CC != CallingConv::AMDGPU_KERNEL && VT.isVector()) { EVT ScalarVT = VT.getScalarType(); unsigned Size = ScalarVT.getSizeInBits(); - if (Size == 32 || Size == 64) + if (Size == 32) return ScalarVT.getSimpleVT(); + if (Size == 64) + return MVT::i32; + if (Size == 16 && Subtarget->has16BitInsts() && isPowerOf2_32(VT.getVectorNumElements())) @@ -721,9 +724,12 @@ unsigned SITargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, EVT ScalarVT = VT.getScalarType(); unsigned Size = ScalarVT.getSizeInBits(); - if (Size == 32 || Size == 64) + if (Size == 32) return NumElts; + if (Size == 64) + return 2 * NumElts; + // FIXME: Fails to break down as we want with v3. if (Size == 16 && Subtarget->has16BitInsts() && isPowerOf2_32(NumElts)) return VT.getVectorNumElements() / 2; @@ -740,13 +746,20 @@ unsigned SITargetLowering::getVectorTypeBreakdownForCallingConv( unsigned NumElts = VT.getVectorNumElements(); EVT ScalarVT = VT.getScalarType(); unsigned Size = ScalarVT.getSizeInBits(); - if (Size == 32 || Size == 64) { + if (Size == 32) { RegisterVT = ScalarVT.getSimpleVT(); IntermediateVT = RegisterVT; NumIntermediates = NumElts; return NumIntermediates; } + if (Size == 64) { + RegisterVT = MVT::i32; + IntermediateVT = RegisterVT; + NumIntermediates = 2 * NumElts; + return NumIntermediates; + } + // FIXME: We should fix the ABI to be the same on targets without 16-bit // support, but unless we can properly handle 3-vectors, it will be still be // inconsistent. diff --git a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll index 599e3595cc9..2cea1414507 100644 --- a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll @@ -25,6 +25,8 @@ declare void @external_void_func_f16(half) #0 declare void @external_void_func_f32(float) #0 declare void @external_void_func_f64(double) #0 declare void @external_void_func_v2f32(<2 x float>) #0 +declare void @external_void_func_v2f64(<2 x double>) #0 +declare void @external_void_func_v3f64(<3 x double>) #0 declare void @external_void_func_v2i16(<2 x i16>) #0 declare void @external_void_func_v2f16(<2 x half>) #0 @@ -274,10 +276,21 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 { ret void } +; GCN-LABEL: {{^}}test_call_external_void_func_v2i64_imm: +; GCN-DAG: v_mov_b32_e32 v0, 1 +; GCN-DAG: v_mov_b32_e32 v1, 2 +; GCN-DAG: v_mov_b32_e32 v2, 3 +; GCN-DAG: v_mov_b32_e32 v3, 4 +; GCN: s_swappc_b64 +define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 { + call void @external_void_func_v2i64(<2 x i64> <i64 8589934593, i64 17179869187>) + ret void +} + ; GCN-LABEL: {{^}}test_call_external_void_func_v3i64: ; GCN: buffer_load_dwordx4 v[0:3] -; GCN: v_mov_b32_e32 v4, s -; GCN: v_mov_b32_e32 v5, s +; GCN: v_mov_b32_e32 v4, 1 +; GCN: v_mov_b32_e32 v5, 2 ; GCN: s_waitcnt ; GCN-NEXT: s_swappc_b64 define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { @@ -288,13 +301,12 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { ret void } -; FIXME: Immedites should fold directly into v_mov_b32s ; GCN-LABEL: {{^}}test_call_external_void_func_v4i64: ; GCN: buffer_load_dwordx4 v[0:3] -; GCN-DAG: v_mov_b32_e32 v4, s -; GCN-DAG: v_mov_b32_e32 v5, s -; GCN-DAG: v_mov_b32_e32 v6, s -; GCN-DAG: v_mov_b32_e32 v7, s +; GCN-DAG: v_mov_b32_e32 v4, 1 +; GCN-DAG: v_mov_b32_e32 v5, 2 +; GCN-DAG: v_mov_b32_e32 v6, 3 +; GCN-DAG: v_mov_b32_e32 v7, 4 ; GCN: s_waitcnt ; GCN-NEXT: s_swappc_b64 @@ -342,6 +354,30 @@ define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 { ret void } +; GCN-LABEL: {{^}}test_call_external_void_func_v2f64_imm: +; GCN: v_mov_b32_e32 v0, 0{{$}} +; GCN: v_mov_b32_e32 v1, 2.0 +; GCN: v_mov_b32_e32 v2, 0{{$}} +; GCN: v_mov_b32_e32 v3, 0x40100000 +; GCN: s_swappc_b64 +define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 { + call void @external_void_func_v2f64(<2 x double> <double 2.0, double 4.0>) + ret void +} + +; GCN-LABEL: {{^}}test_call_external_void_func_v3f64_imm: +; GCN-DAG: v_mov_b32_e32 v0, 0{{$}} +; GCN-DAG: v_mov_b32_e32 v1, 2.0 +; GCN-DAG: v_mov_b32_e32 v2, 0{{$}} +; GCN-DAG: v_mov_b32_e32 v3, 0x40100000 +; GCN-DAG: v_mov_b32_e32 v4, 0{{$}} +; GCN-DAG: v_mov_b32_e32 v5, 0x40200000 +; GCN-DAG: s_swappc_b64 +define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 { + call void @external_void_func_v3f64(<3 x double> <double 2.0, double 4.0, double 8.0>) + ret void +} + ; GCN-LABEL: {{^}}test_call_external_void_func_v2i16: ; GFX9: buffer_load_dword v0 ; GFX9-NOT: v0 |