diff options
author | Artur Pilipenko <apilipenko@azulsystems.com> | 2017-02-16 17:07:27 +0000 |
---|---|---|
committer | Artur Pilipenko <apilipenko@azulsystems.com> | 2017-02-16 17:07:27 +0000 |
commit | 85d758299e480ad4b0c924a971dc79b7809b5acd (patch) | |
tree | c5ab9abb102ca868cfdf05c0af50668d2bad780d /llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll | |
parent | fc711b1f4772dd14ce679176e499683ee7c8642f (diff) | |
download | bcm5719-llvm-85d758299e480ad4b0c924a971dc79b7809b5acd.tar.gz bcm5719-llvm-85d758299e480ad4b0c924a971dc79b7809b5acd.zip |
[DAGCombiner] Support {a|s}ext, {a|z|s}ext load nodes in load combine
Resubmit -r295314 with PowerPC and AMDGPU tests updated.
Support {a|s}ext, {a|z|s}ext load nodes as a part of load combine patters.
Reviewed By: filcab
Differential Revision: https://reviews.llvm.org/D29591
llvm-svn: 295336
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll | 31 |
1 files changed, 16 insertions, 15 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll index 65ac693a4f4..99a3093e033 100644 --- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll @@ -1,5 +1,5 @@ -; RUN: llc -verify-machineinstrs -march=amdgcn -mattr=+max-private-element-size-16 < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -mattr=+max-private-element-size-16 < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mattr=+max-private-element-size-16 < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=GCN-NO-TONGA %s +; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -mattr=+max-private-element-size-16 < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=GCN-TONGA %s ; FIXME: Broken on evergreen ; FIXME: For some reason the 8 and 16 vectors are being stored as @@ -219,10 +219,7 @@ define void @dynamic_insertelement_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> ; GCN: s_waitcnt -; GCN: buffer_load_ushort -; GCN: buffer_load_ushort -; GCN: buffer_load_ushort -; GCN: buffer_load_ushort +; GCN: buffer_load_dwordx2 ; GCN: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off define void @dynamic_insertelement_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, i32 %b) nounwind { @@ -240,8 +237,9 @@ define void @dynamic_insertelement_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> ; GCN: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}} -; GCN: buffer_load_ubyte -; GCN: buffer_load_ubyte +; GCN-NO-TONGA: buffer_load_ubyte +; GCN-NO-TONGA: buffer_load_ubyte +; GCN-TONGA: buffer_load_ushort ; GCN: buffer_store_short v{{[0-9]+}}, off define void @dynamic_insertelement_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> %a, i32 %b) nounwind { @@ -261,9 +259,11 @@ define void @dynamic_insertelement_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> %a ; GCN: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}} -; GCN: buffer_load_ubyte -; GCN: buffer_load_ubyte -; GCN: buffer_load_ubyte +; GCN-NO-TONGA: buffer_load_ubyte +; GCN-NO-TONGA: buffer_load_ubyte +; GCN-NO-TONGA: buffer_load_ubyte +; GCN-TONGA: buffer_load_ushort +; GCN-TONGA: buffer_load_ubyte ; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off ; GCN-DAG: buffer_store_short v{{[0-9]+}}, off @@ -286,10 +286,11 @@ define void @dynamic_insertelement_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> %a ; GCN: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}} -; GCN: buffer_load_ubyte -; GCN: buffer_load_ubyte -; GCN: buffer_load_ubyte -; GCN: buffer_load_ubyte +; GCN-NO-TONGA: buffer_load_ubyte +; GCN-NO-TONGA: buffer_load_ubyte +; GCN-NO-TONGA: buffer_load_ubyte +; GCN-NO-TONGA: buffer_load_ubyte +; GCN-TONGA: buffer_load_dword ; GCN: buffer_store_dword v{{[0-9]+}}, off define void @dynamic_insertelement_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, i32 %b) nounwind { |