AMDGPU: Push bitcasts through build_vector

This reduces the number of copies and reg_sequences when using fp constant vectors. This significantly reduces the code size in local-stack-alloc-bug.ll llvm-svn: 281822
author: Matt Arsenault <Matthew.Arsenault@amd.com> 2016-09-17 15:44:16 +0000
committer: Matt Arsenault <Matthew.Arsenault@amd.com> 2016-09-17 15:44:16 +0000
commit: d99ef1144b38f41ca2e68bf666490110237ec2bf (patch)
tree: 12d8684a46294d1baefee1692c486464eccde361 /llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll
parent: 0baa19004f5e2ec830d94256825c715f708208a8 (diff)
download: bcm5719-llvm-d99ef1144b38f41ca2e68bf666490110237ec2bf.tar.gz
bcm5719-llvm-d99ef1144b38f41ca2e68bf666490110237ec2bf.zip
1 files changed, 69 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll b/llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll
new file mode 100644
index 00000000000..2482fa761b1
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll
@@ -0,0 +1,69 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; The bitcast should be pushed through the bitcasts so the vectors can
+; be broken down and the shared components can be CSEd
+
+; GCN-LABEL: {{^}}store_bitcast_constant_v8i32_to_v8f32:
+; GCN: buffer_store_dwordx4
+; GCN: buffer_store_dwordx4
+; GCN-NOT: v_mov_b32
+; GCN: buffer_store_dwordx4
+; GCN-NOT: v_mov_b32
+; GCN: buffer_store_dwordx4
+define void @store_bitcast_constant_v8i32_to_v8f32(<8 x float> addrspace(1)* %out, <8 x i32> %vec) {
+  %vec0.bc = bitcast <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 8> to <8 x float>
+  store volatile <8 x float> %vec0.bc, <8 x float> addrspace(1)* %out
+
+  %vec1.bc = bitcast <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 9> to <8 x float>
+  store volatile <8 x float> %vec1.bc, <8 x float> addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}store_bitcast_constant_v4i64_to_v8f32:
+; GCN: buffer_store_dwordx4
+; GCN: buffer_store_dwordx4
+; GCN-NOT: v_mov_b32
+; GCN: buffer_store_dwordx4
+; GCN-NOT: v_mov_b32
+; GCN: buffer_store_dwordx4
+define void @store_bitcast_constant_v4i64_to_v8f32(<8 x float> addrspace(1)* %out, <4 x i64> %vec) {
+  %vec0.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 8> to <8 x float>
+  store volatile <8 x float> %vec0.bc, <8 x float> addrspace(1)* %out
+
+  %vec1.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 9> to <8 x float>
+  store volatile <8 x float> %vec1.bc, <8 x float> addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}store_bitcast_constant_v4i64_to_v4f64:
+; GCN: buffer_store_dwordx4
+; GCN: buffer_store_dwordx4
+; GCN-NOT: v_mov_b32
+; GCN: buffer_store_dwordx4
+; GCN-NOT: v_mov_b32
+; GCN: buffer_store_dwordx4
+define void @store_bitcast_constant_v4i64_to_v4f64(<4 x double> addrspace(1)* %out, <4 x i64> %vec) {
+  %vec0.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 8> to <4 x double>
+  store volatile <4 x double> %vec0.bc, <4 x double> addrspace(1)* %out
+
+  %vec1.bc = bitcast <4 x i64> <i64 7, i64 7, i64 7, i64 9> to <4 x double>
+  store volatile <4 x double> %vec1.bc, <4 x double> addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: {{^}}store_bitcast_constant_v8i32_to_v16i16:
+; GCN: buffer_store_dwordx4
+; GCN: buffer_store_dwordx4
+; GCN-NOT: v_mov_b32
+; GCN: buffer_store_dwordx4
+; GCN-NOT: v_mov_b32
+; GCN: buffer_store_dwordx4
+define void @store_bitcast_constant_v8i32_to_v16i16(<8 x float> addrspace(1)* %out, <16 x i16> %vec) {
+  %vec0.bc = bitcast <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 8> to <8 x float>
+  store volatile <8 x float> %vec0.bc, <8 x float> addrspace(1)* %out
+
+  %vec1.bc = bitcast <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 9> to <8 x float>
+  store volatile <8 x float> %vec1.bc, <8 x float> addrspace(1)* %out
+  ret void
+}
author	Matt Arsenault <Matthew.Arsenault@amd.com>	2016-09-17 15:44:16 +0000
committer	Matt Arsenault <Matthew.Arsenault@amd.com>	2016-09-17 15:44:16 +0000
commit	d99ef1144b38f41ca2e68bf666490110237ec2bf (patch)
tree	12d8684a46294d1baefee1692c486464eccde361 /llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll
parent	0baa19004f5e2ec830d94256825c715f708208a8 (diff)
download	bcm5719-llvm-d99ef1144b38f41ca2e68bf666490110237ec2bf.tar.gz bcm5719-llvm-d99ef1144b38f41ca2e68bf666490110237ec2bf.zip