diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-09-17 15:44:16 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-09-17 15:44:16 +0000 |
commit | d99ef1144b38f41ca2e68bf666490110237ec2bf (patch) | |
tree | 12d8684a46294d1baefee1692c486464eccde361 /llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll | |
parent | 0baa19004f5e2ec830d94256825c715f708208a8 (diff) | |
download | bcm5719-llvm-d99ef1144b38f41ca2e68bf666490110237ec2bf.tar.gz bcm5719-llvm-d99ef1144b38f41ca2e68bf666490110237ec2bf.zip |
AMDGPU: Push bitcasts through build_vector
This reduces the number of copies and reg_sequences
when using fp constant vectors. This significantly
reduces the code size in local-stack-alloc-bug.ll
llvm-svn: 281822
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll | 69 |
1 files changed, 69 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll b/llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll new file mode 100644 index 00000000000..2482fa761b1 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll @@ -0,0 +1,69 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +; The bitcast should be pushed through the bitcasts so the vectors can +; be broken down and the shared components can be CSEd + +; GCN-LABEL: {{^}}store_bitcast_constant_v8i32_to_v8f32: +; GCN: buffer_store_dwordx4 +; GCN: buffer_store_dwordx4 +; GCN-NOT: v_mov_b32 +; GCN: buffer_store_dwordx4 +; GCN-NOT: v_mov_b32 +; GCN: buffer_store_dwordx4 +define void @store_bitcast_constant_v8i32_to_v8f32(<8 x float> addrspace(1)* %out, <8 x i32> %vec) { + %vec0.bc = bitcast <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8> to <8 x float> + store volatile <8 x float> %vec0.bc, <8 x float> addrspace(1)* %out + + %vec1.bc = bitcast <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 9> to <8 x float> + store volatile <8 x float> %vec1.bc, <8 x float> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}store_bitcast_constant_v4i64_to_v8f32: +; GCN: buffer_store_dwordx4 +; GCN: buffer_store_dwordx4 +; GCN-NOT: v_mov_b32 +; GCN: buffer_store_dwordx4 +; GCN-NOT: v_mov_b32 +; GCN: buffer_store_dwordx4 +define void @store_bitcast_constant_v4i64_to_v8f32(<8 x float> addrspace(1)* %out, <4 x i64> %vec) { + %vec0.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 8> to <8 x float> + store volatile <8 x float> %vec0.bc, <8 x float> addrspace(1)* %out + + %vec1.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 9> to <8 x float> + store volatile <8 x float> %vec1.bc, <8 x float> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}store_bitcast_constant_v4i64_to_v4f64: +; GCN: buffer_store_dwordx4 +; GCN: buffer_store_dwordx4 +; GCN-NOT: v_mov_b32 +; GCN: buffer_store_dwordx4 +; GCN-NOT: v_mov_b32 +; GCN: buffer_store_dwordx4 +define void @store_bitcast_constant_v4i64_to_v4f64(<4 x double> addrspace(1)* %out, <4 x i64> %vec) { + %vec0.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 8> to <4 x double> + store volatile <4 x double> %vec0.bc, <4 x double> addrspace(1)* %out + + %vec1.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 9> to <4 x double> + store volatile <4 x double> %vec1.bc, <4 x double> addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}store_bitcast_constant_v8i32_to_v16i16: +; GCN: buffer_store_dwordx4 +; GCN: buffer_store_dwordx4 +; GCN-NOT: v_mov_b32 +; GCN: buffer_store_dwordx4 +; GCN-NOT: v_mov_b32 +; GCN: buffer_store_dwordx4 +define void @store_bitcast_constant_v8i32_to_v16i16(<8 x float> addrspace(1)* %out, <16 x i16> %vec) { + %vec0.bc = bitcast <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 8> to <8 x float> + store volatile <8 x float> %vec0.bc, <8 x float> addrspace(1)* %out + + %vec1.bc = bitcast <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 9> to <8 x float> + store volatile <8 x float> %vec1.bc, <8 x float> addrspace(1)* %out + ret void +} |