summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp24
-rw-r--r--llvm/test/CodeGen/X86/vector-blend.ll14
2 files changed, 21 insertions, 17 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 994c493d2ac..4a87779298e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -26470,13 +26470,23 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG,
// (add (xor X, M), (and M, 1))
// And further to:
// (sub (xor X, M), M)
- if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X &&
- ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) &&
- X.getValueType() == MaskVT && Y.getValueType() == MaskVT) {
- assert(EltBits == 8 || EltBits == 16 || EltBits == 32);
- return DAG.getBitcast(
- VT, DAG.getNode(ISD::SUB, DL, MaskVT,
- DAG.getNode(ISD::XOR, DL, MaskVT, X, Mask), Mask));
+ if (X.getValueType() == MaskVT && Y.getValueType() == MaskVT) {
+ auto IsNegV = [](SDNode *N, SDValue V) {
+ return N->getOpcode() == ISD::SUB && N->getOperand(1) == V &&
+ ISD::isBuildVectorAllZeros(N->getOperand(0).getNode());
+ };
+ SDValue V;
+ if (IsNegV(Y.getNode(), X))
+ V = X;
+ else if (IsNegV(X.getNode(), Y))
+ V = Y;
+
+ if (V) {
+ assert(EltBits == 8 || EltBits == 16 || EltBits == 32);
+ return DAG.getBitcast(
+ VT, DAG.getNode(ISD::SUB, DL, MaskVT,
+ DAG.getNode(ISD::XOR, DL, MaskVT, V, Mask), Mask));
+ }
}
// PBLENDVB is only available on SSE 4.1.
diff --git a/llvm/test/CodeGen/X86/vector-blend.ll b/llvm/test/CodeGen/X86/vector-blend.ll
index 9cb4a3bab56..6b96f662ded 100644
--- a/llvm/test/CodeGen/X86/vector-blend.ll
+++ b/llvm/test/CodeGen/X86/vector-blend.ll
@@ -1011,11 +1011,8 @@ define <4 x i32> @blend_neg_logic_v4i32_2(<4 x i32> %v, <4 x i32> %c) {
; SSE2-NEXT: psrld $31, %xmm1
; SSE2-NEXT: pslld $31, %xmm1
; SSE2-NEXT: psrad $31, %xmm1
-; SSE2-NEXT: pxor %xmm2, %xmm2
-; SSE2-NEXT: psubd %xmm0, %xmm2
-; SSE2-NEXT: pand %xmm1, %xmm0
-; SSE2-NEXT: pandn %xmm2, %xmm1
-; SSE2-NEXT: por %xmm1, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: psubd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: blend_neg_logic_v4i32_2:
@@ -1023,11 +1020,8 @@ define <4 x i32> @blend_neg_logic_v4i32_2(<4 x i32> %v, <4 x i32> %c) {
; SSSE3-NEXT: psrld $31, %xmm1
; SSSE3-NEXT: pslld $31, %xmm1
; SSSE3-NEXT: psrad $31, %xmm1
-; SSSE3-NEXT: pxor %xmm2, %xmm2
-; SSSE3-NEXT: psubd %xmm0, %xmm2
-; SSSE3-NEXT: pand %xmm1, %xmm0
-; SSSE3-NEXT: pandn %xmm2, %xmm1
-; SSSE3-NEXT: por %xmm1, %xmm0
+; SSSE3-NEXT: pxor %xmm1, %xmm0
+; SSSE3-NEXT: psubd %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: blend_neg_logic_v4i32_2:
OpenPOWER on IntegriCloud