summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorBenjamin Kramer <benny.kra@googlemail.com>2012-12-25 12:54:19 +0000
committerBenjamin Kramer <benny.kra@googlemail.com>2012-12-25 12:54:19 +0000
commitdf4af41b9b734e9fdf0453ab092013199b4ad4e0 (patch)
treeae50274604fdc41b72ccff62b258f58ab19ae882 /llvm/lib/Target
parentf45d92bb22a999e405b9fdf9c92d12ff6eaed6e4 (diff)
downloadbcm5719-llvm-df4af41b9b734e9fdf0453ab092013199b4ad4e0.tar.gz
bcm5719-llvm-df4af41b9b734e9fdf0453ab092013199b4ad4e0.zip
X86: Custom lower <2 x i64> eq and ne when SSE41 is not available.
pcmpeqd, pshufd, pshufd, pand is cheaper than unpack + cmpq, sbbq, cmpq, sbbq + pack. Small speedup on loop-vectorized viterbi (-march=core2). llvm-svn: 171063
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp26
1 files changed, 24 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 5f7f9150977..a173712b5bc 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -9171,8 +9171,30 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
if (VT == MVT::v2i64) {
if (Opc == X86ISD::PCMPGT && !Subtarget->hasSSE42())
return SDValue();
- if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41())
- return SDValue();
+ if (Opc == X86ISD::PCMPEQ && !Subtarget->hasSSE41()) {
+ // If pcmpeqq is missing but pcmpeqd is available synthesize pcmpeqq with
+ // pcmpeqd + 2 shuffles + pand.
+ assert(Subtarget->hasSSE2() && !FlipSigns && "Don't know how to lower!");
+
+ // First cast everything to the right type,
+ Op0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op0);
+ Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op1);
+
+ // Do the compare.
+ SDValue Result = DAG.getNode(Opc, dl, MVT::v4i32, Op0, Op1);
+
+ // Make sure the lower and upper halves are both all-ones.
+ const int Mask1[] = { 0, 0, 2, 2 };
+ SDValue S1 = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask1);
+ const int Mask2[] = { 1, 1, 3, 3 };
+ SDValue S2 = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask2);
+ Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, S1, S2);
+
+ if (Invert)
+ Result = DAG.getNOT(dl, Result, MVT::v4i32);
+
+ return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+ }
}
// Since SSE has no unsigned integer comparisons, we need to flip the sign
OpenPOWER on IntegriCloud