diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 24 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-blend.ll | 14 |
2 files changed, 21 insertions, 17 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 994c493d2ac..4a87779298e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -26470,13 +26470,23 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG, // (add (xor X, M), (and M, 1)) // And further to: // (sub (xor X, M), M) - if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X && - ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) && - X.getValueType() == MaskVT && Y.getValueType() == MaskVT) { - assert(EltBits == 8 || EltBits == 16 || EltBits == 32); - return DAG.getBitcast( - VT, DAG.getNode(ISD::SUB, DL, MaskVT, - DAG.getNode(ISD::XOR, DL, MaskVT, X, Mask), Mask)); + if (X.getValueType() == MaskVT && Y.getValueType() == MaskVT) { + auto IsNegV = [](SDNode *N, SDValue V) { + return N->getOpcode() == ISD::SUB && N->getOperand(1) == V && + ISD::isBuildVectorAllZeros(N->getOperand(0).getNode()); + }; + SDValue V; + if (IsNegV(Y.getNode(), X)) + V = X; + else if (IsNegV(X.getNode(), Y)) + V = Y; + + if (V) { + assert(EltBits == 8 || EltBits == 16 || EltBits == 32); + return DAG.getBitcast( + VT, DAG.getNode(ISD::SUB, DL, MaskVT, + DAG.getNode(ISD::XOR, DL, MaskVT, V, Mask), Mask)); + } } // PBLENDVB is only available on SSE 4.1. diff --git a/llvm/test/CodeGen/X86/vector-blend.ll b/llvm/test/CodeGen/X86/vector-blend.ll index 9cb4a3bab56..6b96f662ded 100644 --- a/llvm/test/CodeGen/X86/vector-blend.ll +++ b/llvm/test/CodeGen/X86/vector-blend.ll @@ -1011,11 +1011,8 @@ define <4 x i32> @blend_neg_logic_v4i32_2(<4 x i32> %v, <4 x i32> %c) { ; SSE2-NEXT: psrld $31, %xmm1 ; SSE2-NEXT: pslld $31, %xmm1 ; SSE2-NEXT: psrad $31, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm2 -; SSE2-NEXT: psubd %xmm0, %xmm2 -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: pandn %xmm2, %xmm1 -; SSE2-NEXT: por %xmm1, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: psubd %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSSE3-LABEL: blend_neg_logic_v4i32_2: @@ -1023,11 +1020,8 @@ define <4 x i32> @blend_neg_logic_v4i32_2(<4 x i32> %v, <4 x i32> %c) { ; SSSE3-NEXT: psrld $31, %xmm1 ; SSSE3-NEXT: pslld $31, %xmm1 ; SSSE3-NEXT: psrad $31, %xmm1 -; SSSE3-NEXT: pxor %xmm2, %xmm2 -; SSSE3-NEXT: psubd %xmm0, %xmm2 -; SSSE3-NEXT: pand %xmm1, %xmm0 -; SSSE3-NEXT: pandn %xmm2, %xmm1 -; SSSE3-NEXT: por %xmm1, %xmm0 +; SSSE3-NEXT: pxor %xmm1, %xmm0 +; SSSE3-NEXT: psubd %xmm1, %xmm0 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: blend_neg_logic_v4i32_2: |

