diff options
author | Konstantin Zhuravlyov <kzhuravl_dev@outlook.com> | 2016-10-07 14:22:58 +0000 |
---|---|---|
committer | Konstantin Zhuravlyov <kzhuravl_dev@outlook.com> | 2016-10-07 14:22:58 +0000 |
commit | f74fc60a7d31360f97c30b169ed1a3d9786e6e78 (patch) | |
tree | 18965102e9bd253a3db97dd30a42ba67d0d5fd54 /llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll | |
parent | d99ebc03f47332ca0290dbf6d39d1b4f1010b7b1 (diff) | |
download | bcm5719-llvm-f74fc60a7d31360f97c30b169ed1a3d9786e6e78.tar.gz bcm5719-llvm-f74fc60a7d31360f97c30b169ed1a3d9786e6e78.zip |
[AMDGPU] Promote uniform (i1, i16] operations to i32
Differential Revision: https://reviews.llvm.org/D25302
llvm-svn: 283555
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll | 14 |
1 files changed, 10 insertions, 4 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll index dcd48c97434..36275ff12cf 100644 --- a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll +++ b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll @@ -1,6 +1,9 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone +declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone + ; SI-LABEL: {{^}}load_i8_to_f32: ; SI: buffer_load_ubyte [[LOADREG:v[0-9]+]], ; SI-NOT: bfe @@ -80,9 +83,10 @@ define void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)* noalias %out ret void } +; FIXME: Need to handle non-uniform case for function below (load without gep). ; Instructions still emitted to repack bytes for add use. ; SI-LABEL: {{^}}load_v4i8_to_v4f32_2_uses: -; SI: buffer_load_dword +; SI: {{buffer|flat}}_load_dword ; SI-DAG: v_cvt_f32_ubyte0_e32 ; SI-DAG: v_cvt_f32_ubyte1_e32 ; SI-DAG: v_cvt_f32_ubyte2_e32 @@ -96,12 +100,14 @@ define void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)* noalias %out ; SI-DAG: v_and_b32_e32 v{{[0-9]+}}, 0xff00, ; SI-DAG: v_add_i32 -; SI: buffer_store_dwordx4 -; SI: buffer_store_dword +; SI: {{buffer|flat}}_store_dwordx4 +; SI: {{buffer|flat}}_store_dword ; SI: s_endpgm define void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %out2, <4 x i8> addrspace(1)* noalias %in) nounwind { - %load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4 + %tid.x = call i32 @llvm.amdgcn.workitem.id.x() + %in.ptr = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid.x + %load = load <4 x i8>, <4 x i8> addrspace(1)* %in.ptr, align 4 %cvt = uitofp <4 x i8> %load to <4 x float> store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16 %add = add <4 x i8> %load, <i8 9, i8 9, i8 9, i8 9> ; Second use of %load |