diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-10-14 19:57:19 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2017-10-14 19:57:19 +0000 |
| commit | 36fe00ee1739cbcc41cc69f66d439f9788408687 (patch) | |
| tree | 2679980685cdb3e1fb033336b3f04c41fc48a5af | |
| parent | 3f49b988e0426bf3ff306203cc30870ea5f63152 (diff) | |
| download | bcm5719-llvm-36fe00ee1739cbcc41cc69f66d439f9788408687.tar.gz bcm5719-llvm-36fe00ee1739cbcc41cc69f66d439f9788408687.zip | |
[X86][SSE] Don't attempt to reduce the imul vector width of odd sized vectors (PR34947)
llvm-svn: 315825
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 5 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/shrink_vmul.ll | 105 |
2 files changed, 109 insertions, 1 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index ad633747071..c08d79663fa 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -31476,6 +31476,9 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG, SDValue N1 = N->getOperand(1); EVT VT = N->getOperand(0).getValueType(); unsigned NumElts = VT.getVectorNumElements(); + if ((NumElts % 2) != 0) + return SDValue(); + unsigned RegSize = 128; MVT OpsVT = MVT::getVectorVT(MVT::i16, RegSize / 16); EVT ReducedVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, NumElts); @@ -31502,7 +31505,7 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG, // result. // Generate shuffle functioning as punpcklwd. SmallVector<int, 16> ShuffleMask(NumElts); - for (unsigned i = 0, e = NumElts/ 2; i < e; i++) { + for (unsigned i = 0, e = NumElts / 2; i < e; i++) { ShuffleMask[2 * i] = i; ShuffleMask[2 * i + 1] = i + NumElts; } diff --git a/llvm/test/CodeGen/X86/shrink_vmul.ll b/llvm/test/CodeGen/X86/shrink_vmul.ll index 504c8936442..79cf0f2c8f1 100644 --- a/llvm/test/CodeGen/X86/shrink_vmul.ll +++ b/llvm/test/CodeGen/X86/shrink_vmul.ll @@ -1349,3 +1349,108 @@ entry: store <2 x i32> %tmp13, <2 x i32>* %tmp15, align 4 ret void } + +; +; Illegal Types +; + +define void @PR34947() { +; X86-LABEL: PR34947: +; X86: # BB#0: +; X86-NEXT: movdqa (%eax), %xmm0 +; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3] +; X86-NEXT: movd %xmm1, %ecx +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: divl %ecx +; X86-NEXT: movd %edx, %xmm1 +; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] +; X86-NEXT: movd %xmm2, %ecx +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: divl %ecx +; X86-NEXT: movd %edx, %xmm2 +; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; X86-NEXT: movd %xmm0, %ecx +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: divl %ecx +; X86-NEXT: movd %edx, %xmm1 +; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; X86-NEXT: movd %xmm0, %ecx +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: divl %ecx +; X86-NEXT: movd %edx, %xmm0 +; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X86-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: divl (%eax) +; X86-NEXT: movd %edx, %xmm0 +; X86-NEXT: movdqa {{.*#+}} xmm2 = [8199,8199,8199,8199] +; X86-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] +; X86-NEXT: pmuludq %xmm2, %xmm1 +; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; X86-NEXT: pmuludq %xmm2, %xmm3 +; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3] +; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; X86-NEXT: movl $8199, %eax # imm = 0x2007 +; X86-NEXT: movd %eax, %xmm2 +; X86-NEXT: pmuludq %xmm0, %xmm2 +; X86-NEXT: movd %xmm2, (%eax) +; X86-NEXT: movdqa %xmm1, (%eax) +; X86-NEXT: retl +; +; X64-LABEL: PR34947: +; X64: # BB#0: +; X64-NEXT: movdqa (%rax), %xmm0 +; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3] +; X64-NEXT: movd %xmm1, %ecx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: divl %ecx +; X64-NEXT: movd %edx, %xmm1 +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] +; X64-NEXT: movd %xmm2, %ecx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: divl %ecx +; X64-NEXT: movd %edx, %xmm2 +; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; X64-NEXT: movd %xmm0, %ecx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: divl %ecx +; X64-NEXT: movd %edx, %xmm1 +; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; X64-NEXT: movd %xmm0, %ecx +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: divl %ecx +; X64-NEXT: movd %edx, %xmm0 +; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: divl (%rax) +; X64-NEXT: movd %edx, %xmm0 +; X64-NEXT: movdqa {{.*#+}} xmm2 = [8199,8199,8199,8199] +; X64-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] +; X64-NEXT: pmuludq %xmm2, %xmm1 +; X64-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; X64-NEXT: pmuludq %xmm2, %xmm3 +; X64-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3] +; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; X64-NEXT: movl $8199, %eax # imm = 0x2007 +; X64-NEXT: movd %eax, %xmm2 +; X64-NEXT: pmuludq %xmm0, %xmm2 +; X64-NEXT: movd %xmm2, (%rax) +; X64-NEXT: movdqa %xmm1, (%rax) +; X64-NEXT: retq + %tmp = load <9 x i32>, <9 x i32>* undef, align 64 + %rem = urem <9 x i32> zeroinitializer, %tmp + %mul = mul <9 x i32> <i32 8199, i32 8199, i32 8199, i32 8199, i32 8199, i32 8199, i32 8199, i32 8199, i32 8199>, %rem + store <9 x i32> %mul, <9 x i32>* undef, align 64 + ret void +} |

