summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-12-07 11:19:00 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-12-07 11:19:00 +0000
commitd5bc5c16b264b2fa8ddbbf850f3e1f230426ee5b (patch)
tree37d1190e3c7cbd8350068135d9d5de4d8bd830e7
parent8cec7eb6dd762850964a6c63fd9187a72d9550c0 (diff)
downloadbcm5719-llvm-d5bc5c16b264b2fa8ddbbf850f3e1f230426ee5b.tar.gz
bcm5719-llvm-d5bc5c16b264b2fa8ddbbf850f3e1f230426ee5b.zip
[X86][XOP] Fix VPERMIL2 non-constant pool shuffle decoding (PR31296)
The non-constant pool version of DecodeVPERMIL2PMask was not offsetting correctly for the second input. I've updated the code to match the implementation in the constant-pool version. Annoyingly this bug was hidden for so long as it's tricky to combine to useful variable shuffle masks that don't become constant-pool entries. llvm-svn: 288898
-rw-r--r--llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp14
-rw-r--r--llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll10
2 files changed, 16 insertions, 8 deletions
diff --git a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index 3c04bf4899f..1be5aec849f 100644
--- a/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -548,10 +548,11 @@ void DecodeVPERMIL2PMask(MVT VT, unsigned M2Z, ArrayRef<uint64_t> RawMask,
unsigned VecSize = VT.getSizeInBits();
unsigned EltSize = VT.getScalarSizeInBits();
unsigned NumLanes = VecSize / 128;
- unsigned NumEltsPerLane = VT.getVectorNumElements() / NumLanes;
- assert((VecSize == 128 || VecSize == 256) &&
- "Unexpected vector size");
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned NumEltsPerLane = NumElts / NumLanes;
+ assert((VecSize == 128 || VecSize == 256) && "Unexpected vector size");
assert((EltSize == 32 || EltSize == 64) && "Unexpected element size");
+ assert((NumElts == RawMask.size()) && "Unexpected mask size");
for (unsigned i = 0, e = RawMask.size(); i < e; ++i) {
// VPERMIL2 Operation.
@@ -572,14 +573,15 @@ void DecodeVPERMIL2PMask(MVT VT, unsigned M2Z, ArrayRef<uint64_t> RawMask,
continue;
}
- unsigned Index = i & ~(NumEltsPerLane - 1);
+ int Index = i & ~(NumEltsPerLane - 1);
if (EltSize == 64)
Index += (Selector >> 1) & 0x1;
else
Index += Selector & 0x3;
- unsigned SrcOffset = (Selector >> 2) & 1;
- ShuffleMask.push_back((int)(SrcOffset + Index));
+ int Src = (Selector >> 2) & 0x1;
+ Index += Src * NumElts;
+ ShuffleMask.push_back(Index);
}
}
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll
index 41c1866de0d..d7073d6f67a 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll
@@ -345,12 +345,18 @@ define <16 x i8> @constant_fold_vpperm() {
define <4 x float> @PR31296(i8* %in) {
; X32-LABEL: PR31296:
; X32: # BB#0: # %entry
-; X32-NEXT: vmovaps {{.*#+}} xmm0 = [1.000000e+00,0.000000e+00,0.000000e+00,1.000000e+00]
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X32-NEXT: vmovaps {{.*#+}} xmm1 = <0,1,u,u>
+; X32-NEXT: vpermil2ps {{.*#+}} xmm0 = xmm0[0],xmm1[0,0,1]
; X32-NEXT: retl
;
; X64-LABEL: PR31296:
; X64: # BB#0: # %entry
-; X64-NEXT: vmovaps {{.*#+}} xmm0 = [1.000000e+00,0.000000e+00,0.000000e+00,1.000000e+00]
+; X64-NEXT: movl (%rdi), %eax
+; X64-NEXT: vmovq %rax, %xmm0
+; X64-NEXT: vmovaps {{.*#+}} xmm1 = <0,1,u,u>
+; X64-NEXT: vpermil2ps {{.*#+}} xmm0 = xmm0[0],xmm1[0,0,1]
; X64-NEXT: retq
entry:
%0 = getelementptr i8, i8* %in, i32 0
OpenPOWER on IntegriCloud