diff options
| author | Ahmed Bougacha <ahmed.bougacha@gmail.com> | 2016-02-16 22:14:03 +0000 |
|---|---|---|
| committer | Ahmed Bougacha <ahmed.bougacha@gmail.com> | 2016-02-16 22:14:03 +0000 |
| commit | 132fbf5476e161646a5a8bda6033810b6efb814f (patch) | |
| tree | a7aa4e97a69137915e30fade3a8c4d30e68b9872 /llvm/lib/Target | |
| parent | a87c3480b5da61af9d97a82e888258fdde75b244 (diff) | |
| download | bcm5719-llvm-132fbf5476e161646a5a8bda6033810b6efb814f.tar.gz bcm5719-llvm-132fbf5476e161646a5a8bda6033810b6efb814f.zip | |
[X86] Don't turn (c?-v:v) into (c?-v:0) by blindly using PSIGN.
Currently, we sometimes miscompile this vector pattern:
(c ? -v : v)
We lower it to (because "c" is <4 x i1>, lowered as a vector mask):
(~c & v) | (c & -v)
When we have SSSE3, we incorrectly lower that to PSIGN, which does:
(c < 0 ? -v : c > 0 ? v : 0)
in other words, when c is either all-ones or all-zero:
(c ? -v : 0)
While this is an old bug, it rarely triggers because the PSIGN combine
is too sensitive to operand order. This will be improved separately.
Note that the PSIGN tests are also incorrect. Consider:
%b.lobit = ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
%sub = sub nsw <4 x i32> zeroinitializer, %a
%0 = xor <4 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
%1 = and <4 x i32> %a, %0
%2 = and <4 x i32> %b.lobit, %sub
%cond = or <4 x i32> %1, %2
ret <4 x i32> %cond
if %b is zero:
%b.lobit = <4 x i32> zeroinitializer
%sub = sub nsw <4 x i32> zeroinitializer, %a
%0 = <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
%1 = <4 x i32> %a
%2 = <4 x i32> zeroinitializer
%cond = or <4 x i32> %a, zeroinitializer
ret <4 x i32> %a
whereas we currently generate:
psignd %xmm1, %xmm0
retq
which returns 0, as %xmm1 is 0.
Instead, use a pure logic sequence, as described in:
https://graphics.stanford.edu/~seander/bithacks.html#ConditionalNegate
Fixes PR26110.
Differential Revision: http://reviews.llvm.org/D17181
llvm-svn: 261023
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 33 |
1 files changed, 23 insertions, 10 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 0eb28c30ea7..994c493d2ac 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -26394,7 +26394,7 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, // As a special case, try to fold: // (or (and (m, (sub 0, x)), (pandn m, x))) // into: -// (psign m, x) +// (sub (xor X, M), M) static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { assert(N->getOpcode() == ISD::OR); @@ -26403,9 +26403,9 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG, SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); - if (!((VT == MVT::v2i64 && Subtarget.hasSSSE3()) || - (VT == MVT::v4i64 && Subtarget.hasInt256()))) + if (!((VT == MVT::v2i64) || (VT == MVT::v4i64 && Subtarget.hasInt256()))) return SDValue(); + assert(Subtarget.hasSSE2() && "Unexpected i64 vector without SSE2!"); // Canonicalize pandn to RHS if (N0.getOpcode() == X86ISD::ANDNP) @@ -26454,16 +26454,29 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG, SDLoc DL(N); - // Now we know we at least have a plendvb with the mask val. See if - // we can form a psignb/w/d. - // psign = x.type == y.type == mask.type && y = sub(0, x); + // Try to match: + // (or (and (M, (sub 0, X)), (pandn M, X))) + // which is a special case of vselect: + // (vselect M, (sub 0, X), X) + // Per: + // http://graphics.stanford.edu/~seander/bithacks.html#ConditionalNegate + // We know that, if fNegate is 0 or 1: + // (fNegate ? -v : v) == ((v ^ -fNegate) + fNegate) + // + // Here, we have a mask, M (all 1s or 0), and, similarly, we know that: + // ((M & 1) ? -X : X) == ((X ^ -(M & 1)) + (M & 1)) + // ( M ? -X : X) == ((X ^ M ) + (M & 1)) + // This lets us transform our vselect to: + // (add (xor X, M), (and M, 1)) + // And further to: + // (sub (xor X, M), M) if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X && ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) && X.getValueType() == MaskVT && Y.getValueType() == MaskVT) { - assert((EltBits == 8 || EltBits == 16 || EltBits == 32) && - "Unsupported VT for PSIGN"); - Mask = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, Mask.getOperand(0)); - return DAG.getBitcast(VT, Mask); + assert(EltBits == 8 || EltBits == 16 || EltBits == 32); + return DAG.getBitcast( + VT, DAG.getNode(ISD::SUB, DL, MaskVT, + DAG.getNode(ISD::XOR, DL, MaskVT, X, Mask), Mask)); } // PBLENDVB is only available on SSE 4.1. |

