diff options
| author | Craig Topper <craig.topper@intel.com> | 2018-11-19 07:22:26 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2018-11-19 07:22:26 +0000 |
| commit | 8b22bcd39fc6db57d87202f4cc80ab9ecb9e1088 (patch) | |
| tree | 81c5b2e40ab57a16f6119180c121bdf4a4a7c3f7 /llvm | |
| parent | 209cfbe60eb7cb4b21ef0b039df4662aec1bd76d (diff) | |
| download | bcm5719-llvm-8b22bcd39fc6db57d87202f4cc80ab9ecb9e1088.tar.gz bcm5719-llvm-8b22bcd39fc6db57d87202f4cc80ab9ecb9e1088.zip | |
[X86] Use a pcmpgt with 0 instead of psrad 31, to fill elements with the sign bit in v4i32 MULH lowering.
The shift requires a copy to avoid clobbering a register. Comparing with 0 uses an xor to produce 0 that will be overwritten with the compare results. So still requires 2 instructions, but should be one byte shorter since it doesn't need to encode an immediate.
llvm-svn: 347185
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 6 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-idiv.ll | 22 |
3 files changed, 18 insertions, 18 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index cf7d7a9b0d1..05b565304ae 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -23591,11 +23591,11 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget, // If we have a signed multiply but no PMULDQ fix up the result of an // unsigned multiply. if (IsSigned && !Subtarget.hasSSE41()) { - SDValue ShAmt = DAG.getConstant(31, dl, VT); + SDValue Zero = DAG.getConstant(0, dl, VT); SDValue T1 = DAG.getNode(ISD::AND, dl, VT, - DAG.getNode(ISD::SRA, dl, VT, A, ShAmt), B); + DAG.getSetCC(dl, VT, Zero, A, ISD::SETGT), B); SDValue T2 = DAG.getNode(ISD::AND, dl, VT, - DAG.getNode(ISD::SRA, dl, VT, B, ShAmt), A); + DAG.getSetCC(dl, VT, Zero, B, ISD::SETGT), A); SDValue Fixup = DAG.getNode(ISD::ADD, dl, VT, T1, T2); Res = DAG.getNode(ISD::SUB, dl, VT, Res, Fixup); diff --git a/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll b/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll index a1d0508cbb6..07e0ec6b00e 100644 --- a/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll +++ b/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll @@ -86,8 +86,8 @@ define <4 x i32> @test_div7_4i32(<4 x i32> %a) nounwind { ; SSE2-NEXT: pmuludq %xmm2, %xmm3 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3] ; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] -; SSE2-NEXT: movdqa %xmm0, %xmm3 -; SSE2-NEXT: psrad $31, %xmm3 +; SSE2-NEXT: pxor %xmm3, %xmm3 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm3 ; SSE2-NEXT: pand %xmm2, %xmm3 ; SSE2-NEXT: paddd %xmm0, %xmm3 ; SSE2-NEXT: psubd %xmm3, %xmm1 @@ -386,8 +386,8 @@ define <4 x i32> @test_rem7_4i32(<4 x i32> %a) nounwind { ; SSE2-NEXT: pmuludq %xmm1, %xmm3 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3] ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] -; SSE2-NEXT: movdqa %xmm0, %xmm3 -; SSE2-NEXT: psrad $31, %xmm3 +; SSE2-NEXT: pxor %xmm3, %xmm3 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm3 ; SSE2-NEXT: pand %xmm1, %xmm3 ; SSE2-NEXT: paddd %xmm0, %xmm3 ; SSE2-NEXT: psubd %xmm3, %xmm2 diff --git a/llvm/test/CodeGen/X86/vector-idiv.ll b/llvm/test/CodeGen/X86/vector-idiv.ll index 50090b4f819..33779a9cc78 100644 --- a/llvm/test/CodeGen/X86/vector-idiv.ll +++ b/llvm/test/CodeGen/X86/vector-idiv.ll @@ -24,19 +24,19 @@ define <4 x i32> @PR20355(<4 x i32> %a) nounwind { ; SSE2-LABEL: PR20355: ; SSE2: # %bb.0: # %entry ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1431655766,1431655766,1431655766,1431655766] -; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] +; SSE2-NEXT: pxor %xmm3, %xmm3 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm3 +; SSE2-NEXT: pmuludq %xmm1, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,3,2,3] ; SSE2-NEXT: pmuludq %xmm1, %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] -; SSE2-NEXT: pmuludq %xmm1, %xmm3 -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3] -; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] -; SSE2-NEXT: psrad $31, %xmm0 -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: psubd %xmm0, %xmm2 -; SSE2-NEXT: movdqa %xmm2, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,3,2,3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1] +; SSE2-NEXT: pand %xmm1, %xmm3 +; SSE2-NEXT: psubd %xmm3, %xmm4 +; SSE2-NEXT: movdqa %xmm4, %xmm0 ; SSE2-NEXT: psrld $31, %xmm0 -; SSE2-NEXT: paddd %xmm2, %xmm0 +; SSE2-NEXT: paddd %xmm4, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: PR20355: |

