diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-08-27 17:51:56 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-08-27 17:51:56 +0000 |
| commit | 0c096da02f556dddc3c67a6dc9cf0bed3914aa00 (patch) | |
| tree | cfbbaea718f50dce17140d0af577aa9c3185d618 | |
| parent | 92bfb69a44e9d679a4689ac6368d34de8722d565 (diff) | |
| download | bcm5719-llvm-0c096da02f556dddc3c67a6dc9cf0bed3914aa00.tar.gz bcm5719-llvm-0c096da02f556dddc3c67a6dc9cf0bed3914aa00.zip | |
AMDGPU: Fix crash from inconsistent register types for v3i16/v3f16
This is something of a workaround since computeRegisterProperties
seems to be doing the wrong thing.
llvm-svn: 370086
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 6 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll | 89 |
2 files changed, 92 insertions, 3 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index ae746a0e27b..4bcabaaffa5 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1351,9 +1351,9 @@ bool SITargetLowering::isMemOpUniform(const SDNode *N) const { TargetLoweringBase::LegalizeTypeAction SITargetLowering::getPreferredVectorAction(MVT VT) const { - if (VT.getVectorNumElements() != 1 && VT.getScalarType().bitsLE(MVT::i16)) - return TypeSplitVector; - + int NumElts = VT.getVectorNumElements(); + if (NumElts != 1 && VT.getScalarType().bitsLE(MVT::i16)) + return VT.isPow2VectorType() ? TypeSplitVector : TypeWidenVector; return TargetLoweringBase::getPreferredVectorAction(VT); } diff --git a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll index a0a52fbe33c..019beea0ab1 100644 --- a/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll @@ -165,11 +165,100 @@ bb1: ret { i32, half } %ins1 } +define amdgpu_kernel void @v3i16_registers(i1 %cond) #0 { +; GCN-LABEL: v3i16_registers: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_load_dword s4, s[4:5], 0x0 +; GCN-NEXT: s_mov_b32 s33, s9 +; GCN-NEXT: s_add_u32 flat_scratch_lo, s6, s33 +; GCN-NEXT: s_addc_u32 flat_scratch_hi, s7, 0 +; GCN-NEXT: s_mov_b32 s32, s33 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_and_b32 s4, 1, s4 +; GCN-NEXT: v_cmp_eq_u32_e64 s[4:5], s4, 1 +; GCN-NEXT: s_and_b64 vcc, exec, s[4:5] +; GCN-NEXT: s_cbranch_vccz BB4_2 +; GCN-NEXT: ; %bb.1: +; GCN-NEXT: s_mov_b32 s4, 0 +; GCN-NEXT: s_mov_b32 s5, s4 +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: v_mov_b32_e32 v1, s5 +; GCN-NEXT: s_branch BB4_3 +; GCN-NEXT: BB4_2: ; %if.else +; GCN-NEXT: s_getpc_b64 s[4:5] +; GCN-NEXT: s_add_u32 s4, s4, func_v3i16@rel32@lo+4 +; GCN-NEXT: s_addc_u32 s5, s5, func_v3i16@rel32@hi+4 +; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GCN-NEXT: BB4_3: ; %if.end +; GCN-NEXT: global_store_short v[0:1], v1, off +; GCN-NEXT: global_store_dword v[0:1], v0, off +; GCN-NEXT: s_endpgm +entry: + br i1 %cond, label %if.then, label %if.else + +if.then: ; preds = %entry + br label %if.end + +if.else: ; preds = %entry + %call6 = tail call <3 x i16> @func_v3i16() #0 + br label %if.end + +if.end: ; preds = %if.else, %if.then + %call6.sink = phi <3 x i16> [ %call6, %if.else ], [ undef, %if.then ] + store <3 x i16> %call6.sink, <3 x i16> addrspace(1)* undef + ret void +} + +define amdgpu_kernel void @v3f16_registers(i1 %cond) #0 { +; GCN-LABEL: v3f16_registers: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_load_dword s4, s[4:5], 0x0 +; GCN-NEXT: s_mov_b32 s33, s9 +; GCN-NEXT: s_add_u32 flat_scratch_lo, s6, s33 +; GCN-NEXT: s_addc_u32 flat_scratch_hi, s7, 0 +; GCN-NEXT: s_mov_b32 s32, s33 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_and_b32 s4, 1, s4 +; GCN-NEXT: v_cmp_eq_u32_e64 s[4:5], s4, 1 +; GCN-NEXT: s_and_b64 vcc, exec, s[4:5] +; GCN-NEXT: s_cbranch_vccz BB5_2 +; GCN-NEXT: ; %bb.1: +; GCN-NEXT: s_mov_b32 s4, 0 +; GCN-NEXT: s_mov_b32 s5, s4 +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: v_mov_b32_e32 v1, s5 +; GCN-NEXT: s_branch BB5_3 +; GCN-NEXT: BB5_2: ; %if.else +; GCN-NEXT: s_getpc_b64 s[4:5] +; GCN-NEXT: s_add_u32 s4, s4, func_v3f16@rel32@lo+4 +; GCN-NEXT: s_addc_u32 s5, s5, func_v3f16@rel32@hi+4 +; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GCN-NEXT: BB5_3: ; %if.end +; GCN-NEXT: global_store_short v[0:1], v1, off +; GCN-NEXT: global_store_dword v[0:1], v0, off +; GCN-NEXT: s_endpgm +entry: + br i1 %cond, label %if.then, label %if.else + +if.then: ; preds = %entry + br label %if.end + +if.else: ; preds = %entry + %call6 = tail call <3 x half> @func_v3f16() #0 + br label %if.end + +if.end: ; preds = %if.else, %if.then + %call6.sink = phi <3 x half> [ %call6, %if.else ], [ undef, %if.then ] + store <3 x half> %call6.sink, <3 x half> addrspace(1)* undef + ret void +} declare hidden <2 x float> @func_v2f32() #0 declare hidden <3 x float> @func_v3f32() #0 declare hidden <4 x float> @func_v4f32() #0 declare hidden <4 x half> @func_v4f16() #0 +declare hidden <3 x i16> @func_v3i16() +declare hidden <3 x half> @func_v3f16() declare hidden { <4 x i32>, <4 x half> } @func_struct() #0 |

