summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKevin B. Smith <kevin.b.smith@intel.com>2016-04-07 16:15:34 +0000
committerKevin B. Smith <kevin.b.smith@intel.com>2016-04-07 16:15:34 +0000
commit3802c4af596d1e708a999ddb3e965e9b60820ad6 (patch)
treeebc3685ffe84a8c7f63ee0c635f53086ef5d5f23
parent2eb027d21f882e03f0658cddbdfee31efbf08663 (diff)
downloadbcm5719-llvm-3802c4af596d1e708a999ddb3e965e9b60820ad6.tar.gz
bcm5719-llvm-3802c4af596d1e708a999ddb3e965e9b60820ad6.zip
[X86]: Fix for PR27251.
Differential Revision: http://reviews.llvm.org/D18850 llvm-svn: 265690
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp21
-rw-r--r--llvm/test/CodeGen/X86/vector-blend.ll6
2 files changed, 22 insertions, 5 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 836caa9cb05..aa4cbd44963 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -27338,9 +27338,24 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG,
if (V) {
assert(EltBits == 8 || EltBits == 16 || EltBits == 32);
- return DAG.getBitcast(
- VT, DAG.getNode(ISD::SUB, DL, MaskVT,
- DAG.getNode(ISD::XOR, DL, MaskVT, V, Mask), Mask));
+ SDValue SubOp1 = DAG.getNode(ISD::XOR, DL, MaskVT, V, Mask);
+ SDValue SubOp2 = Mask;
+
+ // If the negate was on the false side of the select, then
+ // the operands of the SUB need to be swapped. PR 27251.
+ // This is because the pattern being matched above is
+ // (vselect M, (sub (0, X), X) -> (sub (xor X, M), M)
+ // but if the pattern matched was
+ // (vselect M, X, (sub (0, X))), that is really negation of the pattern
+ // above, -(vselect M, (sub 0, X), X), and therefore the replacement
+ // pattern also needs to be a negation of the replacement pattern above.
+ // And -(sub X, Y) is just sub (Y, X), so swapping the operands of the
+ // sub accomplishes the negation of the replacement pattern.
+ if (V == Y)
+ std::swap(SubOp1, SubOp2);
+
+ return DAG.getBitcast(VT,
+ DAG.getNode(ISD::SUB, DL, MaskVT, SubOp1, SubOp2));
}
}
diff --git a/llvm/test/CodeGen/X86/vector-blend.ll b/llvm/test/CodeGen/X86/vector-blend.ll
index 1de6805cd94..3e00612d430 100644
--- a/llvm/test/CodeGen/X86/vector-blend.ll
+++ b/llvm/test/CodeGen/X86/vector-blend.ll
@@ -1010,7 +1010,8 @@ define <4 x i32> @blend_neg_logic_v4i32_2(<4 x i32> %v, <4 x i32> %c) {
; SSE2-NEXT: pslld $31, %xmm1
; SSE2-NEXT: psrad $31, %xmm1
; SSE2-NEXT: pxor %xmm1, %xmm0
-; SSE2-NEXT: psubd %xmm1, %xmm0
+; SSE2-NEXT: psubd %xmm0, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: blend_neg_logic_v4i32_2:
@@ -1019,7 +1020,8 @@ define <4 x i32> @blend_neg_logic_v4i32_2(<4 x i32> %v, <4 x i32> %c) {
; SSSE3-NEXT: pslld $31, %xmm1
; SSSE3-NEXT: psrad $31, %xmm1
; SSSE3-NEXT: pxor %xmm1, %xmm0
-; SSSE3-NEXT: psubd %xmm1, %xmm0
+; SSSE3-NEXT: psubd %xmm0, %xmm1
+; SSSE3-NEXT: movdqa %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: blend_neg_logic_v4i32_2:
OpenPOWER on IntegriCloud