diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 9 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/mmx-arith.ll | 42 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/shrink_vmul.ll | 14 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-trunc-math.ll | 55 |
4 files changed, 51 insertions, 69 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 14e54f0e4e2..a7560b70049 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -39689,6 +39689,15 @@ static SDValue combinePMULDQ(SDNode *N, SelectionDAG &DAG, SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); + // Canonicalize constant to RHS. + if (DAG.isConstantIntBuildVectorOrConstantInt(LHS) && + !DAG.isConstantIntBuildVectorOrConstantInt(RHS)) + return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), RHS, LHS); + + // Multiply by zero. + if (ISD::isBuildVectorAllZeros(RHS.getNode())) + return RHS; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); diff --git a/llvm/test/CodeGen/X86/mmx-arith.ll b/llvm/test/CodeGen/X86/mmx-arith.ll index c19111b6c80..2d24cb8df35 100644 --- a/llvm/test/CodeGen/X86/mmx-arith.ll +++ b/llvm/test/CodeGen/X86/mmx-arith.ll @@ -213,32 +213,29 @@ define void @test1(x86_mmx* %A, x86_mmx* %B) { ; X32-NEXT: movq %xmm0, (%eax) ; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,1,3] -; X32-NEXT: pxor %xmm2, %xmm2 -; X32-NEXT: pmuludq %xmm1, %xmm2 -; X32-NEXT: movdqa %xmm1, %xmm3 -; X32-NEXT: psrlq $32, %xmm3 -; X32-NEXT: pmuludq %xmm0, %xmm3 -; X32-NEXT: paddq %xmm2, %xmm3 -; X32-NEXT: psllq $32, %xmm3 -; X32-NEXT: pmuludq %xmm1, %xmm0 -; X32-NEXT: paddq %xmm3, %xmm0 -; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3] -; X32-NEXT: movq %xmm1, (%eax) -; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; X32-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1,1,3] -; X32-NEXT: andps %xmm0, %xmm1 +; X32-NEXT: movdqa %xmm1, %xmm2 +; X32-NEXT: pmuludq %xmm0, %xmm2 +; X32-NEXT: psrlq $32, %xmm1 +; X32-NEXT: pmuludq %xmm0, %xmm1 +; X32-NEXT: psllq $32, %xmm1 +; X32-NEXT: paddq %xmm2, %xmm1 ; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] ; X32-NEXT: movq %xmm0, (%eax) ; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,1,3] -; X32-NEXT: orps %xmm1, %xmm0 +; X32-NEXT: andps %xmm1, %xmm0 ; X32-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,2,2,3] ; X32-NEXT: movq %xmm1, (%eax) ; X32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; X32-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1,1,3] -; X32-NEXT: xorps %xmm0, %xmm1 +; X32-NEXT: orps %xmm0, %xmm1 ; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] ; X32-NEXT: movq %xmm0, (%eax) +; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,1,3] +; X32-NEXT: xorps %xmm1, %xmm0 +; X32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; X32-NEXT: movq %xmm0, (%eax) ; X32-NEXT: emms ; X32-NEXT: retl ; @@ -253,15 +250,12 @@ define void @test1(x86_mmx* %A, x86_mmx* %B) { ; X64-NEXT: movq %xmm0, (%rdi) ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] -; X64-NEXT: pxor %xmm2, %xmm2 -; X64-NEXT: pmuludq %xmm1, %xmm2 -; X64-NEXT: movdqa %xmm1, %xmm3 -; X64-NEXT: psrlq $32, %xmm3 -; X64-NEXT: pmuludq %xmm0, %xmm3 -; X64-NEXT: paddq %xmm2, %xmm3 -; X64-NEXT: psllq $32, %xmm3 +; X64-NEXT: movdqa %xmm1, %xmm2 +; X64-NEXT: pmuludq %xmm0, %xmm2 +; X64-NEXT: psrlq $32, %xmm1 ; X64-NEXT: pmuludq %xmm0, %xmm1 -; X64-NEXT: paddq %xmm3, %xmm1 +; X64-NEXT: psllq $32, %xmm1 +; X64-NEXT: paddq %xmm2, %xmm1 ; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] ; X64-NEXT: movq %xmm0, (%rdi) ; X64-NEXT: movq {{.*#+}} xmm0 = mem[0],zero diff --git a/llvm/test/CodeGen/X86/shrink_vmul.ll b/llvm/test/CodeGen/X86/shrink_vmul.ll index 715d1c311c9..aa374f0928e 100644 --- a/llvm/test/CodeGen/X86/shrink_vmul.ll +++ b/llvm/test/CodeGen/X86/shrink_vmul.ll @@ -1235,13 +1235,10 @@ define void @mul_2xi16_sext_zext(i8* nocapture readonly %a, i8* nocapture readon ; X86-SSE-NEXT: pxor %xmm2, %xmm2 ; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] ; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,1,3] -; X86-SSE-NEXT: movdqa %xmm1, %xmm3 -; X86-SSE-NEXT: pmuludq %xmm0, %xmm3 -; X86-SSE-NEXT: pmuludq %xmm2, %xmm1 +; X86-SSE-NEXT: pmuludq %xmm0, %xmm1 ; X86-SSE-NEXT: pmuludq %xmm0, %xmm2 -; X86-SSE-NEXT: paddq %xmm1, %xmm2 ; X86-SSE-NEXT: psllq $32, %xmm2 -; X86-SSE-NEXT: paddq %xmm3, %xmm2 +; X86-SSE-NEXT: paddq %xmm1, %xmm2 ; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] ; X86-SSE-NEXT: movq %xmm0, (%esi,%ecx,4) ; X86-SSE-NEXT: popl %esi @@ -1279,13 +1276,10 @@ define void @mul_2xi16_sext_zext(i8* nocapture readonly %a, i8* nocapture readon ; X64-SSE-NEXT: pxor %xmm2, %xmm2 ; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] ; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,1,3] -; X64-SSE-NEXT: movdqa %xmm1, %xmm3 -; X64-SSE-NEXT: pmuludq %xmm0, %xmm3 -; X64-SSE-NEXT: pmuludq %xmm2, %xmm1 +; X64-SSE-NEXT: pmuludq %xmm0, %xmm1 ; X64-SSE-NEXT: pmuludq %xmm0, %xmm2 -; X64-SSE-NEXT: paddq %xmm1, %xmm2 ; X64-SSE-NEXT: psllq $32, %xmm2 -; X64-SSE-NEXT: paddq %xmm3, %xmm2 +; X64-SSE-NEXT: paddq %xmm1, %xmm2 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,2,2,3] ; X64-SSE-NEXT: movq %xmm0, (%rax,%rdx,4) ; X64-SSE-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-trunc-math.ll b/llvm/test/CodeGen/X86/vector-trunc-math.ll index d5202a279c9..a342fd9f1cc 100644 --- a/llvm/test/CodeGen/X86/vector-trunc-math.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-math.ll @@ -5510,28 +5510,20 @@ define <16 x i8> @trunc_or_const_v16i16_v16i8(<16 x i16> %a0) nounwind { define <4 x i32> @mul_add_const_v4i64_v4i32(<4 x i32> %a0, <4 x i32> %a1) nounwind { ; SSE-LABEL: mul_add_const_v4i64_v4i32: ; SSE: # %bb.0: -; SSE-NEXT: movdqa %xmm0, %xmm2 -; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] -; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,1,3,3] -; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,1,1,3] +; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,1,3] +; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,1,3,3] +; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm1[0,1,1,3] ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3] -; SSE-NEXT: pxor %xmm4, %xmm4 -; SSE-NEXT: pxor %xmm5, %xmm5 -; SSE-NEXT: pmuludq %xmm1, %xmm5 -; SSE-NEXT: movdqa %xmm2, %xmm6 -; SSE-NEXT: pmuludq %xmm4, %xmm6 -; SSE-NEXT: paddq %xmm5, %xmm6 -; SSE-NEXT: psllq $32, %xmm6 -; SSE-NEXT: pmuludq %xmm1, %xmm2 -; SSE-NEXT: paddq %xmm6, %xmm2 -; SSE-NEXT: movdqa %xmm0, %xmm1 -; SSE-NEXT: pmuludq %xmm4, %xmm1 -; SSE-NEXT: pmuludq %xmm3, %xmm4 -; SSE-NEXT: paddq %xmm1, %xmm4 -; SSE-NEXT: psllq $32, %xmm4 -; SSE-NEXT: pmuludq %xmm3, %xmm0 -; SSE-NEXT: paddq %xmm4, %xmm0 -; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2] +; SSE-NEXT: pmuludq %xmm1, %xmm3 +; SSE-NEXT: pxor %xmm0, %xmm0 +; SSE-NEXT: pmuludq %xmm0, %xmm1 +; SSE-NEXT: psllq $32, %xmm1 +; SSE-NEXT: paddq %xmm3, %xmm1 +; SSE-NEXT: pmuludq %xmm4, %xmm2 +; SSE-NEXT: pmuludq %xmm4, %xmm0 +; SSE-NEXT: psllq $32, %xmm0 +; SSE-NEXT: paddq %xmm2, %xmm0 +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] ; SSE-NEXT: paddd {{.*}}(%rip), %xmm0 ; SSE-NEXT: retq ; @@ -5607,24 +5599,17 @@ define <4 x i32> @mul_add_multiuse_v4i64_v4i32(<4 x i32> %a0, <4 x i32> %a1) nou ; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,1,3,3] ; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm1[0,1,1,3] ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3] -; SSE-NEXT: pxor %xmm5, %xmm5 -; SSE-NEXT: pxor %xmm6, %xmm6 -; SSE-NEXT: pmuludq %xmm1, %xmm6 -; SSE-NEXT: movdqa %xmm3, %xmm7 -; SSE-NEXT: pmuludq %xmm5, %xmm7 -; SSE-NEXT: paddq %xmm6, %xmm7 -; SSE-NEXT: psllq $32, %xmm7 ; SSE-NEXT: pmuludq %xmm1, %xmm3 -; SSE-NEXT: paddq %xmm7, %xmm3 -; SSE-NEXT: movdqa %xmm2, %xmm1 +; SSE-NEXT: pxor %xmm5, %xmm5 ; SSE-NEXT: pmuludq %xmm5, %xmm1 +; SSE-NEXT: psllq $32, %xmm1 +; SSE-NEXT: paddq %xmm3, %xmm1 +; SSE-NEXT: pmuludq %xmm4, %xmm2 ; SSE-NEXT: pmuludq %xmm4, %xmm5 -; SSE-NEXT: paddq %xmm1, %xmm5 ; SSE-NEXT: psllq $32, %xmm5 -; SSE-NEXT: pmuludq %xmm4, %xmm2 -; SSE-NEXT: paddq %xmm5, %xmm2 -; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] -; SSE-NEXT: paddd %xmm2, %xmm0 +; SSE-NEXT: paddq %xmm2, %xmm5 +; SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,2],xmm1[0,2] +; SSE-NEXT: paddd %xmm5, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: mul_add_multiuse_v4i64_v4i32: |

