diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-12-07 11:19:00 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-12-07 11:19:00 +0000 |
| commit | d5bc5c16b264b2fa8ddbbf850f3e1f230426ee5b (patch) | |
| tree | 37d1190e3c7cbd8350068135d9d5de4d8bd830e7 /llvm/test/CodeGen | |
| parent | 8cec7eb6dd762850964a6c63fd9187a72d9550c0 (diff) | |
| download | bcm5719-llvm-d5bc5c16b264b2fa8ddbbf850f3e1f230426ee5b.tar.gz bcm5719-llvm-d5bc5c16b264b2fa8ddbbf850f3e1f230426ee5b.zip | |
[X86][XOP] Fix VPERMIL2 non-constant pool shuffle decoding (PR31296)
The non-constant pool version of DecodeVPERMIL2PMask was not offsetting correctly for the second input. I've updated the code to match the implementation in the constant-pool version.
Annoyingly this bug was hidden for so long as it's tricky to combine to useful variable shuffle masks that don't become constant-pool entries.
llvm-svn: 288898
Diffstat (limited to 'llvm/test/CodeGen')
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll | 10 |
1 files changed, 8 insertions, 2 deletions
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll index 41c1866de0d..d7073d6f67a 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll @@ -345,12 +345,18 @@ define <16 x i8> @constant_fold_vpperm() { define <4 x float> @PR31296(i8* %in) { ; X32-LABEL: PR31296: ; X32: # BB#0: # %entry -; X32-NEXT: vmovaps {{.*#+}} xmm0 = [1.000000e+00,0.000000e+00,0.000000e+00,1.000000e+00] +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X32-NEXT: vmovaps {{.*#+}} xmm1 = <0,1,u,u> +; X32-NEXT: vpermil2ps {{.*#+}} xmm0 = xmm0[0],xmm1[0,0,1] ; X32-NEXT: retl ; ; X64-LABEL: PR31296: ; X64: # BB#0: # %entry -; X64-NEXT: vmovaps {{.*#+}} xmm0 = [1.000000e+00,0.000000e+00,0.000000e+00,1.000000e+00] +; X64-NEXT: movl (%rdi), %eax +; X64-NEXT: vmovq %rax, %xmm0 +; X64-NEXT: vmovaps {{.*#+}} xmm1 = <0,1,u,u> +; X64-NEXT: vpermil2ps {{.*#+}} xmm0 = xmm0[0],xmm1[0,0,1] ; X64-NEXT: retq entry: %0 = getelementptr i8, i8* %in, i32 0 |

