summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2019-08-09 12:44:20 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2019-08-09 12:44:20 +0000
commit60394f47b0cc5b7c1dc6c059118c576148578672 (patch)
tree4f710e6667e178e553b1c7f99a4b2a8666e4257a
parent991834a51616799456a5e65cefde2f41c350aca2 (diff)
downloadbcm5719-llvm-60394f47b0cc5b7c1dc6c059118c576148578672.tar.gz
bcm5719-llvm-60394f47b0cc5b7c1dc6c059118c576148578672.zip
[X86][SSE] Swap X86ISD::BLENDV inputs with an inverted selection mask (PR42825)
As discussed on PR42825, if we are inverting the selection mask we can just swap the inputs and avoid the inversion. Differential Revision: https://reviews.llvm.org/D65522 llvm-svn: 368438
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp6
-rw-r--r--llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll25
-rw-r--r--llvm/test/CodeGen/X86/nontemporal-loads.ll9
3 files changed, 19 insertions, 21 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index fa5f788ff9f..6dd5ec87e7b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -36615,6 +36615,12 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
if (SDValue V = narrowVectorSelect(N, DAG, Subtarget))
return V;
+ // select(~Cond, X, Y) -> select(Cond, Y, X)
+ if (CondVT.getScalarType() != MVT::i1)
+ if (SDValue CondNot = IsNOT(Cond, DAG))
+ return DAG.getNode(N->getOpcode(), DL, VT,
+ DAG.getBitcast(CondVT, CondNot), RHS, LHS);
+
// Custom action for SELECT MMX
if (VT == MVT::x86mmx) {
LHS = DAG.getBitcast(MVT::i64, LHS);
diff --git a/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll b/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll
index 0774f7fe20d..cc4dee33c61 100644
--- a/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll
@@ -157,10 +157,9 @@ define <16 x i8> @xor_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
; CHECK-LABEL: xor_pblendvb:
; CHECK: # %bb.0:
; CHECK-NEXT: movdqa %xmm0, %xmm3
-; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
-; CHECK-NEXT: pxor %xmm2, %xmm0
-; CHECK-NEXT: pblendvb %xmm0, %xmm1, %xmm3
-; CHECK-NEXT: movdqa %xmm3, %xmm0
+; CHECK-NEXT: movaps %xmm2, %xmm0
+; CHECK-NEXT: pblendvb %xmm0, %xmm3, %xmm1
+; CHECK-NEXT: movdqa %xmm1, %xmm0
; CHECK-NEXT: retq
%1 = xor <16 x i8> %a2, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
%2 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %1)
@@ -170,11 +169,10 @@ define <16 x i8> @xor_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
define <4 x float> @xor_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
; CHECK-LABEL: xor_blendvps:
; CHECK: # %bb.0:
-; CHECK-NEXT: movdqa %xmm0, %xmm3
-; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
-; CHECK-NEXT: pxor %xmm2, %xmm0
-; CHECK-NEXT: blendvps %xmm0, %xmm1, %xmm3
-; CHECK-NEXT: movaps %xmm3, %xmm0
+; CHECK-NEXT: movaps %xmm0, %xmm3
+; CHECK-NEXT: movaps %xmm2, %xmm0
+; CHECK-NEXT: blendvps %xmm0, %xmm3, %xmm1
+; CHECK-NEXT: movaps %xmm1, %xmm0
; CHECK-NEXT: retq
%1 = bitcast <4 x float> %a2 to <4 x i32>
%2 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -186,11 +184,10 @@ define <4 x float> @xor_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %
define <2 x double> @xor_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
; CHECK-LABEL: xor_blendvpd:
; CHECK: # %bb.0:
-; CHECK-NEXT: movdqa %xmm0, %xmm3
-; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
-; CHECK-NEXT: pxor %xmm2, %xmm0
-; CHECK-NEXT: blendvpd %xmm0, %xmm1, %xmm3
-; CHECK-NEXT: movapd %xmm3, %xmm0
+; CHECK-NEXT: movapd %xmm0, %xmm3
+; CHECK-NEXT: movaps %xmm2, %xmm0
+; CHECK-NEXT: blendvpd %xmm0, %xmm3, %xmm1
+; CHECK-NEXT: movapd %xmm1, %xmm0
; CHECK-NEXT: retq
%1 = bitcast <2 x double> %a2 to <4 x i32>
%2 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
diff --git a/llvm/test/CodeGen/X86/nontemporal-loads.ll b/llvm/test/CodeGen/X86/nontemporal-loads.ll
index 8f0118d39bd..8af4a680c77 100644
--- a/llvm/test/CodeGen/X86/nontemporal-loads.ll
+++ b/llvm/test/CodeGen/X86/nontemporal-loads.ll
@@ -1852,25 +1852,20 @@ define <16 x i32> @test_masked_v16i32(i8 * %addr, <16 x i32> %old, <16 x i32> %m
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
; AVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5
; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm4
-; AVX1-NEXT: vpcmpeqd %xmm6, %xmm6, %xmm6
-; AVX1-NEXT: vpxor %xmm6, %xmm4, %xmm4
; AVX1-NEXT: vpcmpeqd %xmm5, %xmm3, %xmm3
-; AVX1-NEXT: vpxor %xmm6, %xmm3, %xmm3
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm4
-; AVX1-NEXT: vpxor %xmm6, %xmm4, %xmm4
; AVX1-NEXT: vpcmpeqd %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vpxor %xmm6, %xmm2, %xmm2
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm4
; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm5
; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm4, %ymm4
-; AVX1-NEXT: vblendvps %ymm3, %ymm4, %ymm1, %ymm1
+; AVX1-NEXT: vblendvps %ymm3, %ymm1, %ymm4, %ymm1
; AVX1-NEXT: vmovntdqa (%rdi), %xmm3
; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm4
; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3
-; AVX1-NEXT: vblendvps %ymm2, %ymm3, %ymm0, %ymm0
+; AVX1-NEXT: vblendvps %ymm2, %ymm0, %ymm3, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test_masked_v16i32:
OpenPOWER on IntegriCloud