summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorAhmed Bougacha <ahmed.bougacha@gmail.com>2016-02-16 22:14:03 +0000
committerAhmed Bougacha <ahmed.bougacha@gmail.com>2016-02-16 22:14:03 +0000
commit132fbf5476e161646a5a8bda6033810b6efb814f (patch)
treea7aa4e97a69137915e30fade3a8c4d30e68b9872 /llvm/lib/Target
parenta87c3480b5da61af9d97a82e888258fdde75b244 (diff)
downloadbcm5719-llvm-132fbf5476e161646a5a8bda6033810b6efb814f.tar.gz
bcm5719-llvm-132fbf5476e161646a5a8bda6033810b6efb814f.zip
[X86] Don't turn (c?-v:v) into (c?-v:0) by blindly using PSIGN.
Currently, we sometimes miscompile this vector pattern: (c ? -v : v) We lower it to (because "c" is <4 x i1>, lowered as a vector mask): (~c & v) | (c & -v) When we have SSSE3, we incorrectly lower that to PSIGN, which does: (c < 0 ? -v : c > 0 ? v : 0) in other words, when c is either all-ones or all-zero: (c ? -v : 0) While this is an old bug, it rarely triggers because the PSIGN combine is too sensitive to operand order. This will be improved separately. Note that the PSIGN tests are also incorrect. Consider: %b.lobit = ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31> %sub = sub nsw <4 x i32> zeroinitializer, %a %0 = xor <4 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1> %1 = and <4 x i32> %a, %0 %2 = and <4 x i32> %b.lobit, %sub %cond = or <4 x i32> %1, %2 ret <4 x i32> %cond if %b is zero: %b.lobit = <4 x i32> zeroinitializer %sub = sub nsw <4 x i32> zeroinitializer, %a %0 = <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1> %1 = <4 x i32> %a %2 = <4 x i32> zeroinitializer %cond = or <4 x i32> %a, zeroinitializer ret <4 x i32> %a whereas we currently generate: psignd %xmm1, %xmm0 retq which returns 0, as %xmm1 is 0. Instead, use a pure logic sequence, as described in: https://graphics.stanford.edu/~seander/bithacks.html#ConditionalNegate Fixes PR26110. Differential Revision: http://reviews.llvm.org/D17181 llvm-svn: 261023
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp33
1 files changed, 23 insertions, 10 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0eb28c30ea7..994c493d2ac 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -26394,7 +26394,7 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
// As a special case, try to fold:
// (or (and (m, (sub 0, x)), (pandn m, x)))
// into:
-// (psign m, x)
+// (sub (xor X, M), M)
static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
assert(N->getOpcode() == ISD::OR);
@@ -26403,9 +26403,9 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG,
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
- if (!((VT == MVT::v2i64 && Subtarget.hasSSSE3()) ||
- (VT == MVT::v4i64 && Subtarget.hasInt256())))
+ if (!((VT == MVT::v2i64) || (VT == MVT::v4i64 && Subtarget.hasInt256())))
return SDValue();
+ assert(Subtarget.hasSSE2() && "Unexpected i64 vector without SSE2!");
// Canonicalize pandn to RHS
if (N0.getOpcode() == X86ISD::ANDNP)
@@ -26454,16 +26454,29 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG,
SDLoc DL(N);
- // Now we know we at least have a plendvb with the mask val. See if
- // we can form a psignb/w/d.
- // psign = x.type == y.type == mask.type && y = sub(0, x);
+ // Try to match:
+ // (or (and (M, (sub 0, X)), (pandn M, X)))
+ // which is a special case of vselect:
+ // (vselect M, (sub 0, X), X)
+ // Per:
+ // http://graphics.stanford.edu/~seander/bithacks.html#ConditionalNegate
+ // We know that, if fNegate is 0 or 1:
+ // (fNegate ? -v : v) == ((v ^ -fNegate) + fNegate)
+ //
+ // Here, we have a mask, M (all 1s or 0), and, similarly, we know that:
+ // ((M & 1) ? -X : X) == ((X ^ -(M & 1)) + (M & 1))
+ // ( M ? -X : X) == ((X ^ M ) + (M & 1))
+ // This lets us transform our vselect to:
+ // (add (xor X, M), (and M, 1))
+ // And further to:
+ // (sub (xor X, M), M)
if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X &&
ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) &&
X.getValueType() == MaskVT && Y.getValueType() == MaskVT) {
- assert((EltBits == 8 || EltBits == 16 || EltBits == 32) &&
- "Unsupported VT for PSIGN");
- Mask = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, Mask.getOperand(0));
- return DAG.getBitcast(VT, Mask);
+ assert(EltBits == 8 || EltBits == 16 || EltBits == 32);
+ return DAG.getBitcast(
+ VT, DAG.getNode(ISD::SUB, DL, MaskVT,
+ DAG.getNode(ISD::XOR, DL, MaskVT, X, Mask), Mask));
}
// PBLENDVB is only available on SSE 4.1.
OpenPOWER on IntegriCloud