[X86] Don't turn (c?-v:v) into (c?-v:0) by blindly using PSIGN.

Currently, we sometimes miscompile this vector pattern: (c ? -v : v) We lower it to (because "c" is <4 x i1>, lowered as a vector mask): (~c & v) | (c & -v) When we have SSSE3, we incorrectly lower that to PSIGN, which does: (c < 0 ? -v : c > 0 ? v : 0) in other words, when c is either all-ones or all-zero: (c ? -v : 0) While this is an old bug, it rarely triggers because the PSIGN combine is too sensitive to operand order. This will be improved separately. Note that the PSIGN tests are also incorrect. Consider: %b.lobit = ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31> %sub = sub nsw <4 x i32> zeroinitializer, %a %0 = xor <4 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1> %1 = and <4 x i32> %a, %0 %2 = and <4 x i32> %b.lobit, %sub %cond = or <4 x i32> %1, %2 ret <4 x i32> %cond if %b is zero: %b.lobit = <4 x i32> zeroinitializer %sub = sub nsw <4 x i32> zeroinitializer, %a %0 = <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1> %1 = <4 x i32> %a %2 = <4 x i32> zeroinitializer %cond = or <4 x i32> %a, zeroinitializer ret <4 x i32> %a whereas we currently generate: psignd %xmm1, %xmm0 retq which returns 0, as %xmm1 is 0. Instead, use a pure logic sequence, as described in: https://graphics.stanford.edu/~seander/bithacks.html#ConditionalNegate Fixes PR26110. Differential Revision: http://reviews.llvm.org/D17181 llvm-svn: 261023
author: Ahmed Bougacha <ahmed.bougacha@gmail.com> 2016-02-16 22:14:03 +0000
committer: Ahmed Bougacha <ahmed.bougacha@gmail.com> 2016-02-16 22:14:03 +0000
commit: 132fbf5476e161646a5a8bda6033810b6efb814f (patch)
tree: a7aa4e97a69137915e30fade3a8c4d30e68b9872 /llvm/lib/Target
parent: a87c3480b5da61af9d97a82e888258fdde75b244 (diff)
download: bcm5719-llvm-132fbf5476e161646a5a8bda6033810b6efb814f.tar.gz
bcm5719-llvm-132fbf5476e161646a5a8bda6033810b6efb814f.zip
1 files changed, 23 insertions, 10 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0eb28c30ea7..994c493d2ac 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -26394,7 +26394,7 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
 // As a special case, try to fold:
 //   (or (and (m, (sub 0, x)), (pandn m, x)))
 // into:
-//   (psign m, x)
+//   (sub (xor X, M), M)
 static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG,
                                             const X86Subtarget &Subtarget) {
   assert(N->getOpcode() == ISD::OR);
@@ -26403,9 +26403,9 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG,
   SDValue N1 = N->getOperand(1);
   EVT VT = N->getValueType(0);
 
-  if (!((VT == MVT::v2i64 && Subtarget.hasSSSE3()) ||
-        (VT == MVT::v4i64 && Subtarget.hasInt256())))
+  if (!((VT == MVT::v2i64) || (VT == MVT::v4i64 && Subtarget.hasInt256())))
     return SDValue();
+  assert(Subtarget.hasSSE2() && "Unexpected i64 vector without SSE2!");
 
   // Canonicalize pandn to RHS
   if (N0.getOpcode() == X86ISD::ANDNP)
@@ -26454,16 +26454,29 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG,
 
   SDLoc DL(N);
 
-  // Now we know we at least have a plendvb with the mask val.  See if
-  // we can form a psignb/w/d.
-  // psign = x.type == y.type == mask.type && y = sub(0, x);
+  // Try to match:
+  //   (or (and (M, (sub 0, X)), (pandn M, X)))
+  // which is a special case of vselect:
+  //   (vselect M, (sub 0, X), X)
+  // Per:
+  // http://graphics.stanford.edu/~seander/bithacks.html#ConditionalNegate
+  // We know that, if fNegate is 0 or 1:
+  //   (fNegate ? -v : v) == ((v ^ -fNegate) + fNegate)
+  //
+  // Here, we have a mask, M (all 1s or 0), and, similarly, we know that:
+  //   ((M & 1) ? -X : X) == ((X ^ -(M & 1)) + (M & 1))
+  //   ( M      ? -X : X) == ((X ^   M     ) + (M & 1))
+  // This lets us transform our vselect to:
+  //   (add (xor X, M), (and M, 1))
+  // And further to:
+  //   (sub (xor X, M), M)
   if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X &&
       ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) &&
       X.getValueType() == MaskVT && Y.getValueType() == MaskVT) {
-    assert((EltBits == 8 || EltBits == 16 || EltBits == 32) &&
-           "Unsupported VT for PSIGN");
-    Mask = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, Mask.getOperand(0));
-    return DAG.getBitcast(VT, Mask);
+    assert(EltBits == 8 || EltBits == 16 || EltBits == 32);
+    return DAG.getBitcast(
+        VT, DAG.getNode(ISD::SUB, DL, MaskVT,
+                        DAG.getNode(ISD::XOR, DL, MaskVT, X, Mask), Mask));
   }
 
   // PBLENDVB is only available on SSE 4.1.
author	Ahmed Bougacha <ahmed.bougacha@gmail.com>	2016-02-16 22:14:03 +0000
committer	Ahmed Bougacha <ahmed.bougacha@gmail.com>	2016-02-16 22:14:03 +0000
commit	132fbf5476e161646a5a8bda6033810b6efb814f (patch)
tree	a7aa4e97a69137915e30fade3a8c4d30e68b9872 /llvm/lib/Target
parent	a87c3480b5da61af9d97a82e888258fdde75b244 (diff)
download	bcm5719-llvm-132fbf5476e161646a5a8bda6033810b6efb814f.tar.gz bcm5719-llvm-132fbf5476e161646a5a8bda6033810b6efb814f.zip