[DAGCombine] Fix for shuffle to vector extend for non power 2 vectors

Summary: See https://llvm.org/PR33743 for more details It seems that for non-power of 2 vector sizes, the algorithm can produce non-matching sizes for input and result causing an assert. This usually isn't a problem as the isAnyExtend check will weed these out, but in some cases (most often with lots of undefined values for the mask indices) it can pass this check for non power of 2 vectors. Adding in an extra check that ensures that bit size will match for the result and input (as required) Subscribers: nhaehnle Differential Revision: https://reviews.llvm.org/D35241 llvm-svn: 315307
author: David Stuttard <david.stuttard@amd.com> 2017-10-10 12:45:45 +0000
committer: David Stuttard <david.stuttard@amd.com> 2017-10-10 12:45:45 +0000
commit: 51c1b2280665589c82329b9d6dfb20600ba42efa (patch)
tree: c5054a0ecca62f7cfe07b44b7bea7a318b6c34f1
parent: 9ba7401a7bf0a823402e71dcf8503d18a7647b00 (diff)
download: bcm5719-llvm-51c1b2280665589c82329b9d6dfb20600ba42efa.tar.gz
bcm5719-llvm-51c1b2280665589c82329b9d6dfb20600ba42efa.zip
2 files changed, 35 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 526c3b8d878..fef1c8f749c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -15566,6 +15566,9 @@ static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
   // power-of-2 extensions as they are the most likely.
   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
+    // Check for non power of 2 vector sizes
+    if (NumElts % Scale != 0)
+      continue;
     if (!isAnyExtend(Scale))
       continue;
 
diff --git a/llvm/test/CodeGen/AMDGPU/dagcomb-shuffle-vecextend-non2.ll b/llvm/test/CodeGen/AMDGPU/dagcomb-shuffle-vecextend-non2.ll
new file mode 100644
index 00000000000..e7e7b9f907c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/dagcomb-shuffle-vecextend-non2.ll
@@ -0,0 +1,32 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; We are only checking that instruction selection can succeed in this case. This
+; cut down test results in no instructions, but that's fine.
+;
+; See https://llvm.org/PR33743 for details of the bug being addressed
+;
+; Checking that shufflevector with 3-vec mask is handled in
+; combineShuffleToVectorExtend
+;
+; GCN: s_endpgm
+
+define amdgpu_ps void @main(i32 %in1) local_unnamed_addr {
+.entry:
+  br i1 undef, label %bb12, label %bb
+
+bb:
+  %__llpc_global_proxy_r5.12.vec.insert = insertelement <4 x i32> undef, i32 %in1, i32 3
+  %tmp3 = shufflevector <4 x i32> %__llpc_global_proxy_r5.12.vec.insert, <4 x i32> undef, <3 x i32> <i32 undef, i32 undef, i32 1>
+  %tmp4 = bitcast <3 x i32> %tmp3 to <3 x float>
+  %a2.i123 = extractelement <3 x float> %tmp4, i32 2
+  %tmp5 = bitcast float %a2.i123 to i32
+  %__llpc_global_proxy_r2.0.vec.insert196 = insertelement <4 x i32> undef, i32 %tmp5, i32 0
+  br label %bb12
+
+bb12:
+  %__llpc_global_proxy_r2.0 = phi <4 x i32> [ %__llpc_global_proxy_r2.0.vec.insert196, %bb ], [ undef, %.entry ]
+  %tmp6 = shufflevector <4 x i32> %__llpc_global_proxy_r2.0, <4 x i32> undef, <3 x i32> <i32 1, i32 2, i32 3>
+  %tmp7 = bitcast <3 x i32> %tmp6 to <3 x float>
+  %a0.i = extractelement <3 x float> %tmp7, i32 0
+  ret void
+}
author	David Stuttard <david.stuttard@amd.com>	2017-10-10 12:45:45 +0000
committer	David Stuttard <david.stuttard@amd.com>	2017-10-10 12:45:45 +0000
commit	51c1b2280665589c82329b9d6dfb20600ba42efa (patch)
tree	c5054a0ecca62f7cfe07b44b7bea7a318b6c34f1
parent	9ba7401a7bf0a823402e71dcf8503d18a7647b00 (diff)
download	bcm5719-llvm-51c1b2280665589c82329b9d6dfb20600ba42efa.tar.gz bcm5719-llvm-51c1b2280665589c82329b9d6dfb20600ba42efa.zip