diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2015-07-14 18:20:33 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2015-07-14 18:20:33 +0000 |
commit | 24692118bad46f907f80086f0e83c19f3905f5c3 (patch) | |
tree | 3b27c6aab3027b6172c5f35f25bfe9340c558025 /llvm/test/CodeGen/AMDGPU/array-ptr-calc-i64.ll | |
parent | 5c6efed3f5cc85b55ea87e774b6a7701fe1d13df (diff) | |
download | bcm5719-llvm-24692118bad46f907f80086f0e83c19f3905f5c3.tar.gz bcm5719-llvm-24692118bad46f907f80086f0e83c19f3905f5c3.zip |
AMDGPU: Avoid using 64-bit shift for i64 (shl x, 32)
This can be done only with moves which theoretically
will optimize better later.
Although this transform increases the instruction count,
it should be code size / cycle count neutral in the worst
VALU case. It also seems to slightly improve a couple
of testcases due to other DAG combines this exposes.
This is probably slightly worse for the SALU case, so
it might be better to handle this during moveToVALU,
although then you lose some simplifications like
the load width reducing in the simple testcase.
llvm-svn: 242177
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/array-ptr-calc-i64.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/array-ptr-calc-i64.ll | 5 |
1 files changed, 3 insertions, 2 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i64.ll b/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i64.ll index eae095eb844..a3ae3c3aea1 100644 --- a/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i64.ll +++ b/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i64.ll @@ -3,8 +3,9 @@ declare i32 @llvm.SI.tid() readnone ; SI-LABEL: {{^}}test_array_ptr_calc: -; SI: v_mul_lo_i32 -; SI: v_mul_hi_i32 +; SI-DAG: v_mul_lo_i32 +; SI-DAG: v_mul_hi_i32 +; SI: s_endpgm define void @test_array_ptr_calc(i32 addrspace(1)* noalias %out, [1025 x i32] addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) { %tid = call i32 @llvm.SI.tid() readnone %a_ptr = getelementptr [1025 x i32], [1025 x i32] addrspace(1)* %inA, i32 %tid, i32 0 |