diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2015-11-24 12:05:03 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2015-11-24 12:05:03 +0000 |
commit | 4d801cd357c74bb7c2a60fedf4030b9fb5b4827f (patch) | |
tree | ab89b79da213d3d358dbdaf7b6ba19ba2fe2d994 /llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll | |
parent | 9d0f44bf8af57cbe992edada1a5351881b1388b2 (diff) | |
download | bcm5719-llvm-4d801cd357c74bb7c2a60fedf4030b9fb5b4827f.tar.gz bcm5719-llvm-4d801cd357c74bb7c2a60fedf4030b9fb5b4827f.zip |
AMDGPU: Split x8 and x16 vector loads instead of scalarize
The one regression in the builtin tests is in the read2 test which now
(again) has many extra copies, but this should be solved once the pass
is replaced with a DAG combine.
llvm-svn: 253974
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll | 10 |
1 files changed, 2 insertions, 8 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll index cd554ba256b..834922c62cb 100644 --- a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll +++ b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll @@ -137,14 +137,8 @@ define void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8> ; SI-DAG: v_cvt_f32_ubyte0_e32 v{{[0-9]+}}, v[[HILOAD]] ; SI-NOT: bfe ; SI-NOT: lshr -; SI: buffer_store_dword -; SI: buffer_store_dword -; SI: buffer_store_dword -; SI: buffer_store_dword -; SI: buffer_store_dword -; SI: buffer_store_dword -; SI: buffer_store_dword -; SI: buffer_store_dword +; SI: buffer_store_dwordx4 +; SI: buffer_store_dwordx4 define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind { %load = load <8 x i8>, <8 x i8> addrspace(1)* %in, align 8 %cvt = uitofp <8 x i8> %load to <8 x float> |