diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-08-09 12:44:20 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-08-09 12:44:20 +0000 |
| commit | 60394f47b0cc5b7c1dc6c059118c576148578672 (patch) | |
| tree | 4f710e6667e178e553b1c7f99a4b2a8666e4257a | |
| parent | 991834a51616799456a5e65cefde2f41c350aca2 (diff) | |
| download | bcm5719-llvm-60394f47b0cc5b7c1dc6c059118c576148578672.tar.gz bcm5719-llvm-60394f47b0cc5b7c1dc6c059118c576148578672.zip | |
[X86][SSE] Swap X86ISD::BLENDV inputs with an inverted selection mask (PR42825)
As discussed on PR42825, if we are inverting the selection mask we can just swap the inputs and avoid the inversion.
Differential Revision: https://reviews.llvm.org/D65522
llvm-svn: 368438
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 6 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll | 25 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/nontemporal-loads.ll | 9 |
3 files changed, 19 insertions, 21 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index fa5f788ff9f..6dd5ec87e7b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -36615,6 +36615,12 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, if (SDValue V = narrowVectorSelect(N, DAG, Subtarget)) return V; + // select(~Cond, X, Y) -> select(Cond, Y, X) + if (CondVT.getScalarType() != MVT::i1) + if (SDValue CondNot = IsNOT(Cond, DAG)) + return DAG.getNode(N->getOpcode(), DL, VT, + DAG.getBitcast(CondVT, CondNot), RHS, LHS); + // Custom action for SELECT MMX if (VT == MVT::x86mmx) { LHS = DAG.getBitcast(MVT::i64, LHS); diff --git a/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll b/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll index 0774f7fe20d..cc4dee33c61 100644 --- a/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll +++ b/llvm/test/CodeGen/X86/combine-sse41-intrinsics.ll @@ -157,10 +157,9 @@ define <16 x i8> @xor_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) { ; CHECK-LABEL: xor_pblendvb: ; CHECK: # %bb.0: ; CHECK-NEXT: movdqa %xmm0, %xmm3 -; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 -; CHECK-NEXT: pxor %xmm2, %xmm0 -; CHECK-NEXT: pblendvb %xmm0, %xmm1, %xmm3 -; CHECK-NEXT: movdqa %xmm3, %xmm0 +; CHECK-NEXT: movaps %xmm2, %xmm0 +; CHECK-NEXT: pblendvb %xmm0, %xmm3, %xmm1 +; CHECK-NEXT: movdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %1 = xor <16 x i8> %a2, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> %2 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %1) @@ -170,11 +169,10 @@ define <16 x i8> @xor_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) { define <4 x float> @xor_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { ; CHECK-LABEL: xor_blendvps: ; CHECK: # %bb.0: -; CHECK-NEXT: movdqa %xmm0, %xmm3 -; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 -; CHECK-NEXT: pxor %xmm2, %xmm0 -; CHECK-NEXT: blendvps %xmm0, %xmm1, %xmm3 -; CHECK-NEXT: movaps %xmm3, %xmm0 +; CHECK-NEXT: movaps %xmm0, %xmm3 +; CHECK-NEXT: movaps %xmm2, %xmm0 +; CHECK-NEXT: blendvps %xmm0, %xmm3, %xmm1 +; CHECK-NEXT: movaps %xmm1, %xmm0 ; CHECK-NEXT: retq %1 = bitcast <4 x float> %a2 to <4 x i32> %2 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -186,11 +184,10 @@ define <4 x float> @xor_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> % define <2 x double> @xor_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { ; CHECK-LABEL: xor_blendvpd: ; CHECK: # %bb.0: -; CHECK-NEXT: movdqa %xmm0, %xmm3 -; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 -; CHECK-NEXT: pxor %xmm2, %xmm0 -; CHECK-NEXT: blendvpd %xmm0, %xmm1, %xmm3 -; CHECK-NEXT: movapd %xmm3, %xmm0 +; CHECK-NEXT: movapd %xmm0, %xmm3 +; CHECK-NEXT: movaps %xmm2, %xmm0 +; CHECK-NEXT: blendvpd %xmm0, %xmm3, %xmm1 +; CHECK-NEXT: movapd %xmm1, %xmm0 ; CHECK-NEXT: retq %1 = bitcast <2 x double> %a2 to <4 x i32> %2 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1> diff --git a/llvm/test/CodeGen/X86/nontemporal-loads.ll b/llvm/test/CodeGen/X86/nontemporal-loads.ll index 8f0118d39bd..8af4a680c77 100644 --- a/llvm/test/CodeGen/X86/nontemporal-loads.ll +++ b/llvm/test/CodeGen/X86/nontemporal-loads.ll @@ -1852,25 +1852,20 @@ define <16 x i32> @test_masked_v16i32(i8 * %addr, <16 x i32> %old, <16 x i32> %m ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 ; AVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5 ; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vpcmpeqd %xmm6, %xmm6, %xmm6 -; AVX1-NEXT: vpxor %xmm6, %xmm4, %xmm4 ; AVX1-NEXT: vpcmpeqd %xmm5, %xmm3, %xmm3 -; AVX1-NEXT: vpxor %xmm6, %xmm3, %xmm3 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 ; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vpxor %xmm6, %xmm4, %xmm4 ; AVX1-NEXT: vpcmpeqd %xmm5, %xmm2, %xmm2 -; AVX1-NEXT: vpxor %xmm6, %xmm2, %xmm2 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 ; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm4 ; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm5 ; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm4, %ymm4 -; AVX1-NEXT: vblendvps %ymm3, %ymm4, %ymm1, %ymm1 +; AVX1-NEXT: vblendvps %ymm3, %ymm1, %ymm4, %ymm1 ; AVX1-NEXT: vmovntdqa (%rdi), %xmm3 ; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm4 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 -; AVX1-NEXT: vblendvps %ymm2, %ymm3, %ymm0, %ymm0 +; AVX1-NEXT: vblendvps %ymm2, %ymm0, %ymm3, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_masked_v16i32: |

