summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2019-07-03 14:34:16 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2019-07-03 14:34:16 +0000
commit868d0b7fd99c3db010a9977571d91574e6295500 (patch)
treea4c2a031607303775cf1823f25999ad3db9702c2
parentfa4aac7335ac7ecabbb634d134bd4897783bf62b (diff)
downloadbcm5719-llvm-868d0b7fd99c3db010a9977571d91574e6295500.tar.gz
bcm5719-llvm-868d0b7fd99c3db010a9977571d91574e6295500.zip
[X86][AVX] Combine vpermi(bitcast(x)) -> bitcast(vpermi(x))
iff the number of elements doesn't change. This gets around an issue with combineX86ShuffleChain not being able to hint which domain is preferred for shuffles that can be done with either. Fixes regression introduced in rL365041 llvm-svn: 365044
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp16
-rw-r--r--llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll4
2 files changed, 18 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2912c249283..3e75756cffd 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -31946,6 +31946,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
// Which shuffle domains are permitted?
// Permit domain crossing at higher combine depths.
+ // TODO: Should we indicate which domain is preferred if both are allowed?
bool AllowFloatDomain = FloatDomain || (Depth > 3);
bool AllowIntDomain = (!FloatDomain || (Depth > 3)) && Subtarget.hasSSE2() &&
(!MaskVT.is256BitVector() || Subtarget.hasAVX2());
@@ -33057,6 +33058,21 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
}
return SDValue();
}
+ case X86ISD::VPERMI: {
+ // vpermi(bitcast(x)) -> bitcast(vpermi(x)) for same number of elements.
+ // TODO: Remove when we have preferred domains in combineX86ShuffleChain.
+ SDValue N0 = N.getOperand(0);
+ SDValue N1 = N.getOperand(1);
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ if (N0.getOpcode() == ISD::BITCAST &&
+ N0.getOperand(0).getScalarValueSizeInBits() == EltSizeInBits) {
+ SDValue Src = N0.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ SDValue Res = DAG.getNode(X86ISD::VPERMI, DL, SrcVT, Src, N1);
+ return DAG.getBitcast(VT, Res);
+ }
+ return SDValue();
+ }
case X86ISD::PSHUFD:
case X86ISD::PSHUFLW:
case X86ISD::PSHUFHW:
diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll b/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
index de581aa3fb9..b3e154b3107 100644
--- a/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
+++ b/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll
@@ -2216,9 +2216,9 @@ define <2 x i64> @test_masked_8xi64_to_2xi64_perm_mask0(<8 x i64> %vec, <2 x i64
define <2 x i64> @test_masked_z_8xi64_to_2xi64_perm_mask0(<8 x i64> %vec, <2 x i64> %mask) {
; CHECK-LABEL: test_masked_z_8xi64_to_2xi64_perm_mask0:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpermq {{.*#+}} zmm0 = zmm0[3,0,2,3,7,4,6,7]
; CHECK-NEXT: vptestnmq %xmm1, %xmm1, %k1
-; CHECK-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,2,3,7,4,6,7]
+; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <2 x i32> <i32 3, i32 0>
OpenPOWER on IntegriCloud