summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBill Schmidt <wschmidt@linux.vnet.ibm.com>2015-07-29 14:31:57 +0000
committerBill Schmidt <wschmidt@linux.vnet.ibm.com>2015-07-29 14:31:57 +0000
commit42ddd71120e445748a8d992e12297560590b3ca4 (patch)
treec717d9c23359ab5809316b934ccad037c08e608f
parent085da7ecae9f00338f95bd60f38be5f3de58733a (diff)
downloadbcm5719-llvm-42ddd71120e445748a8d992e12297560590b3ca4.tar.gz
bcm5719-llvm-42ddd71120e445748a8d992e12297560590b3ca4.zip
[PPC] Fix PR24216: Don't generate splat for misaligned shuffle mask
Given certain shuffle-vector masks, LLVM emits splat instructions which splat the wrong bytes from the source register. The issue is that the function PPC::isSplatShuffleMask() in PPCISelLowering.cpp does not ensure that the splat pattern found is requesting bytes that are aligned on an EltSize boundary. This patch detects this situation as not a valid splat mask, resulting in a permute being generated instead of a splat. Patch and test case by Tyler Kenney, cleaned up a bit by me. This is a simple bug fix that would be good to incorporate into 3.7. llvm-svn: 243519
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp5
-rw-r--r--llvm/test/CodeGen/PowerPC/pr24216.ll14
2 files changed, 19 insertions, 0 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index e7bc9030e98..94171a6cc47 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1430,6 +1430,11 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
assert(N->getValueType(0) == MVT::v16i8 &&
(EltSize == 1 || EltSize == 2 || EltSize == 4));
+ // The consecutive indices need to specify an element, not part of two
+ // different elements. So abandon ship early if this isn't the case.
+ if (N->getMaskElt(0) % EltSize != 0)
+ return false;
+
// This is a splat operation if each element of the permute is the same, and
// if the value doesn't reference the second vector.
unsigned ElementBase = N->getMaskElt(0);
diff --git a/llvm/test/CodeGen/PowerPC/pr24216.ll b/llvm/test/CodeGen/PowerPC/pr24216.ll
new file mode 100644
index 00000000000..4ab41985f5b
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/pr24216.ll
@@ -0,0 +1,14 @@
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
+
+; Test case adapted from PR24216.
+
+define void @foo(<16 x i8>* nocapture readonly %in, <16 x i8>* nocapture %out) {
+entry:
+ %0 = load <16 x i8>, <16 x i8>* %in, align 16
+ %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 2, i32 3, i32 4, i32 5, i32 2, i32 3, i32 4, i32 5, i32 2, i32 3, i32 4, i32 5>
+ store <16 x i8> %1, <16 x i8>* %out, align 16
+ ret void
+}
+
+; CHECK: vperm
+; CHECK-NOT: vspltw
OpenPOWER on IntegriCloud