summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-11-19 07:22:26 +0000
committerCraig Topper <craig.topper@intel.com>2018-11-19 07:22:26 +0000
commit8b22bcd39fc6db57d87202f4cc80ab9ecb9e1088 (patch)
tree81c5b2e40ab57a16f6119180c121bdf4a4a7c3f7 /llvm
parent209cfbe60eb7cb4b21ef0b039df4662aec1bd76d (diff)
downloadbcm5719-llvm-8b22bcd39fc6db57d87202f4cc80ab9ecb9e1088.tar.gz
bcm5719-llvm-8b22bcd39fc6db57d87202f4cc80ab9ecb9e1088.zip
[X86] Use a pcmpgt with 0 instead of psrad 31, to fill elements with the sign bit in v4i32 MULH lowering.
The shift requires a copy to avoid clobbering a register. Comparing with 0 uses an xor to produce 0 that will be overwritten with the compare results. So still requires 2 instructions, but should be one byte shorter since it doesn't need to encode an immediate. llvm-svn: 347185
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp6
-rw-r--r--llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll8
-rw-r--r--llvm/test/CodeGen/X86/vector-idiv.ll22
3 files changed, 18 insertions, 18 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index cf7d7a9b0d1..05b565304ae 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -23591,11 +23591,11 @@ static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
// If we have a signed multiply but no PMULDQ fix up the result of an
// unsigned multiply.
if (IsSigned && !Subtarget.hasSSE41()) {
- SDValue ShAmt = DAG.getConstant(31, dl, VT);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
SDValue T1 = DAG.getNode(ISD::AND, dl, VT,
- DAG.getNode(ISD::SRA, dl, VT, A, ShAmt), B);
+ DAG.getSetCC(dl, VT, Zero, A, ISD::SETGT), B);
SDValue T2 = DAG.getNode(ISD::AND, dl, VT,
- DAG.getNode(ISD::SRA, dl, VT, B, ShAmt), A);
+ DAG.getSetCC(dl, VT, Zero, B, ISD::SETGT), A);
SDValue Fixup = DAG.getNode(ISD::ADD, dl, VT, T1, T2);
Res = DAG.getNode(ISD::SUB, dl, VT, Res, Fixup);
diff --git a/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll b/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll
index a1d0508cbb6..07e0ec6b00e 100644
--- a/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll
@@ -86,8 +86,8 @@ define <4 x i32> @test_div7_4i32(<4 x i32> %a) nounwind {
; SSE2-NEXT: pmuludq %xmm2, %xmm3
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; SSE2-NEXT: movdqa %xmm0, %xmm3
-; SSE2-NEXT: psrad $31, %xmm3
+; SSE2-NEXT: pxor %xmm3, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm3
; SSE2-NEXT: pand %xmm2, %xmm3
; SSE2-NEXT: paddd %xmm0, %xmm3
; SSE2-NEXT: psubd %xmm3, %xmm1
@@ -386,8 +386,8 @@ define <4 x i32> @test_rem7_4i32(<4 x i32> %a) nounwind {
; SSE2-NEXT: pmuludq %xmm1, %xmm3
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
-; SSE2-NEXT: movdqa %xmm0, %xmm3
-; SSE2-NEXT: psrad $31, %xmm3
+; SSE2-NEXT: pxor %xmm3, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm3
; SSE2-NEXT: pand %xmm1, %xmm3
; SSE2-NEXT: paddd %xmm0, %xmm3
; SSE2-NEXT: psubd %xmm3, %xmm2
diff --git a/llvm/test/CodeGen/X86/vector-idiv.ll b/llvm/test/CodeGen/X86/vector-idiv.ll
index 50090b4f819..33779a9cc78 100644
--- a/llvm/test/CodeGen/X86/vector-idiv.ll
+++ b/llvm/test/CodeGen/X86/vector-idiv.ll
@@ -24,19 +24,19 @@ define <4 x i32> @PR20355(<4 x i32> %a) nounwind {
; SSE2-LABEL: PR20355:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1431655766,1431655766,1431655766,1431655766]
-; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; SSE2-NEXT: pxor %xmm3, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm3
+; SSE2-NEXT: pmuludq %xmm1, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,3,2,3]
; SSE2-NEXT: pmuludq %xmm1, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
-; SSE2-NEXT: pmuludq %xmm1, %xmm3
-; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
-; SSE2-NEXT: psrad $31, %xmm0
-; SSE2-NEXT: pand %xmm1, %xmm0
-; SSE2-NEXT: psubd %xmm0, %xmm2
-; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,3,2,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1]
+; SSE2-NEXT: pand %xmm1, %xmm3
+; SSE2-NEXT: psubd %xmm3, %xmm4
+; SSE2-NEXT: movdqa %xmm4, %xmm0
; SSE2-NEXT: psrld $31, %xmm0
-; SSE2-NEXT: paddd %xmm2, %xmm0
+; SSE2-NEXT: paddd %xmm4, %xmm0
; SSE2-NEXT: retq
;
; SSE41-LABEL: PR20355:
OpenPOWER on IntegriCloud