summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2017-12-04 22:18:27 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2017-12-04 22:18:27 +0000
commit68f05052638e11164bc88cd4022ad78d8ad72306 (patch)
tree077e0797317387386a1088fcc6f984d3cc8c98da /llvm/test
parente6667ded4d0bf664271b36dd98cc5ca69604ae9e (diff)
downloadbcm5719-llvm-68f05052638e11164bc88cd4022ad78d8ad72306.tar.gz
bcm5719-llvm-68f05052638e11164bc88cd4022ad78d8ad72306.zip
AMDGPU: Fix creating invalid copy when adjusting dmask
Move the entire optimization to one place. Before it was possible to adjust dmask without changing the register class of the output instruction, since they were done in separate places. Fix all lane sizes and move all of the optimization into the DAG folding. llvm-svn: 319705
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll51
1 files changed, 51 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll b/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
new file mode 100644
index 00000000000..d8cf67af7b0
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
@@ -0,0 +1,51 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}adjust_writemask_crash_0:
+; GCN: image_get_lod v0, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x2
+; GCN-NOT: v1
+; GCN-NOT: v0
+; GCN: buffer_store_dword v0
+define amdgpu_ps void @adjust_writemask_crash_0() #0 {
+main_body:
+ %tmp = call <2 x float> @llvm.amdgcn.image.getlod.v2f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
+ %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+ %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
+ %tmp4 = extractelement <4 x float> %tmp3, i32 0
+ store volatile float %tmp4, float addrspace(1)* undef
+ ret void
+}
+
+; GCN-LABEL: {{^}}adjust_writemask_crash_1:
+; GCN: image_get_lod v0, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0x1
+; GCN-NOT: v1
+; GCN-NOT: v0
+; GCN: buffer_store_dword v0
+define amdgpu_ps void @adjust_writemask_crash_1() #0 {
+main_body:
+ %tmp = call <2 x float> @llvm.amdgcn.image.getlod.v2f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 3, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
+ %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
+ %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
+ %tmp4 = extractelement <4 x float> %tmp3, i32 1
+ store volatile float %tmp4, float addrspace(1)* undef
+ ret void
+}
+
+define amdgpu_ps void @adjust_writemask_crash_0_v4() #0 {
+main_body:
+ %tmp = call <4 x float> @llvm.amdgcn.image.getlod.v4f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 5, i1 false, i1 false, i1 false, i1 false, i1 false)
+ %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
+ %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+ %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
+ %tmp4 = extractelement <4 x float> %tmp3, i32 0
+ store volatile float %tmp4, float addrspace(1)* undef
+ ret void
+}
+
+
+declare <2 x float> @llvm.amdgcn.image.getlod.v2f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.getlod.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readonly }
OpenPOWER on IntegriCloud