summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNadav Rotem <nadav.rotem@intel.com>2012-01-12 15:31:55 +0000
committerNadav Rotem <nadav.rotem@intel.com>2012-01-12 15:31:55 +0000
commit0a0a829beaf5d1030f56bd3290df593a9f6f4a76 (patch)
tree6c0f69e209f520c9d9e7212b33fd61dab56e8072
parentfd950878fa902a9d5ec6f3accd96bad12ca8979f (diff)
downloadbcm5719-llvm-0a0a829beaf5d1030f56bd3290df593a9f6f4a76.tar.gz
bcm5719-llvm-0a0a829beaf5d1030f56bd3290df593a9f6f4a76.zip
Fix a bug in the AVX 256-bit shuffle code in cases where the splat element is on the boundary of two 128-bit vectors.
The attached testcase was stuck in an endless loop. llvm-svn: 148027
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp2
-rw-r--r--llvm/test/CodeGen/X86/2012-01-12-extract-sv.ll12
2 files changed, 13 insertions, 1 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 771ca082506..c643cefb6c0 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -4413,7 +4413,7 @@ static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
// Extract the 128-bit part containing the splat element and update
// the splat element index when it refers to the higher register.
if (Size == 256) {
- unsigned Idx = (EltNo > NumElems/2) ? NumElems/2 : 0;
+ unsigned Idx = (EltNo >= NumElems/2) ? NumElems/2 : 0;
V1 = Extract128BitVector(V1, DAG.getConstant(Idx, MVT::i32), DAG, dl);
if (Idx > 0)
EltNo -= NumElems/2;
diff --git a/llvm/test/CodeGen/X86/2012-01-12-extract-sv.ll b/llvm/test/CodeGen/X86/2012-01-12-extract-sv.ll
new file mode 100644
index 00000000000..fa8e80f0bde
--- /dev/null
+++ b/llvm/test/CodeGen/X86/2012-01-12-extract-sv.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mcpu=corei7-avx -mattr=+avx -mtriple=i686-pc-win32 | FileCheck %s
+
+; CHECK: endless_loop
+define void @endless_loop() {
+entry:
+ %0 = load <8 x i32> addrspace(1)* undef, align 32
+ %1 = shufflevector <8 x i32> %0, <8 x i32> undef, <16 x i32> <i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %2 = shufflevector <16 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef>, <16 x i32> %1, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 17>
+ store <16 x i32> %2, <16 x i32> addrspace(1)* undef, align 64
+ ret void
+; CHECK: ret
+}
OpenPOWER on IntegriCloud