diff options
author | Craig Topper <craig.topper@intel.com> | 2018-02-28 22:23:55 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2018-02-28 22:23:55 +0000 |
commit | e31b9d1e5f1af55e4eea4826a91b8a0c5f6ec71d (patch) | |
tree | e64ef9ad598c449b367c82a1350a484c0f03362c | |
parent | 4142369204efa08adfcc70e5b0af476d6403a968 (diff) | |
download | bcm5719-llvm-e31b9d1e5f1af55e4eea4826a91b8a0c5f6ec71d.tar.gz bcm5719-llvm-e31b9d1e5f1af55e4eea4826a91b8a0c5f6ec71d.zip |
[X86] Lower extract_element from k-registers by bitcasting from v16i1 to i16 and extending/truncating.
This is equivalent to what isel was doing anyway but by canonicalizing earlier we can remove some patterns.
llvm-svn: 326375
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 41 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 3 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-insert-extract.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/pr33349.ll | 24 |
5 files changed, 33 insertions, 41 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 57e50d30af8..8aecfd8fe7d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -14910,36 +14910,35 @@ static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG, return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt); } - // Canonicalize result type to MVT::i32. - if (EltVT != MVT::i32) { - SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, - Vec, Idx); - return DAG.getAnyExtOrTrunc(Extract, dl, EltVT); - } - unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); - // Extracts from element 0 are always allowed. - if (IdxVal == 0) - return Op; - // If the kshift instructions of the correct width aren't natively supported // then we need to promote the vector to the native size to get the correct // zeroing behavior. - if ((!Subtarget.hasDQI() && (VecVT.getVectorNumElements() == 8)) || - (VecVT.getVectorNumElements() < 8)) { + if (VecVT.getVectorNumElements() < 16) { VecVT = MVT::v16i1; - Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VecVT, - DAG.getUNDEF(VecVT), - Vec, + Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1, + DAG.getUNDEF(VecVT), Vec, DAG.getIntPtrConstant(0, dl)); } - // Use kshiftr instruction to move to the lower element. - Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec, - DAG.getConstant(IdxVal, dl, MVT::i8)); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, Vec, - DAG.getIntPtrConstant(0, dl)); + // Extracts from element 0 are always allowed. + if (IdxVal != 0) { + // Use kshiftr instruction to move to the lower element. + Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec, + DAG.getConstant(IdxVal, dl, MVT::i8)); + } + + // Shrink to v16i1 since that's always legal. + if (VecVT.getVectorNumElements() > 16) { + VecVT = MVT::v16i1; + Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VecVT, Vec, + DAG.getIntPtrConstant(0, dl)); + } + + // Convert to a bitcast+aext/trunc. + MVT CastVT = MVT::getIntegerVT(VecVT.getVectorNumElements()); + return DAG.getAnyExtOrTrunc(DAG.getBitcast(CastVT, Vec), dl, EltVT); } SDValue diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index e8ebbb33509..91e4aca1489 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -2859,9 +2859,6 @@ let Predicates = [HasAVX512] in { def : Pat<(maskVT (scalar_to_vector GR32:$src)), (COPY_TO_REGCLASS GR32:$src, maskRC)>; - def : Pat<(i32 (X86kextract maskRC:$src, (iPTR 0))), - (COPY_TO_REGCLASS maskRC:$src, GR32)>; - def : Pat<(maskVT (scalar_to_vector GR8:$src)), (COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>; } diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index aa6fcb19b41..36e8073e9c5 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -457,10 +457,6 @@ def X86SubVBroadcast : SDNode<"X86ISD::SUBV_BROADCAST", def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>; def X86VBroadcastm : SDNode<"X86ISD::VBROADCASTM", SDTVBroadcastm>; -def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT", - SDTypeProfile<1, 2, [SDTCisVT<0, i32>, - SDTCVecEltisVT<1, i1>, - SDTCisPtrTy<2>]>>; def X86Blendi : SDNode<"X86ISD::BLENDI", SDTBlend>; diff --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll index 236985505e1..605d51eaef7 100644 --- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll +++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll @@ -269,7 +269,7 @@ define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) { ; SKX-LABEL: test14: ; SKX: ## %bb.0: ; SKX-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 -; SKX-NEXT: kshiftrb $4, %k0, %k0 +; SKX-NEXT: kshiftrw $4, %k0, %k0 ; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: testb $1, %al ; SKX-NEXT: cmoveq %rsi, %rdi diff --git a/llvm/test/CodeGen/X86/pr33349.ll b/llvm/test/CodeGen/X86/pr33349.ll index 6ca02a10729..ec9f2381928 100644 --- a/llvm/test/CodeGen/X86/pr33349.ll +++ b/llvm/test/CodeGen/X86/pr33349.ll @@ -12,32 +12,32 @@ target triple = "x86_64-unknown-linux-gnu" ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kshiftrw $1, %k0, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: kshiftrw $2, %k0, %k1 -; KNL-NEXT: kshiftrw $1, %k1, %k2 -; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: testb $1, %al ; KNL-NEXT: fld1 ; KNL-NEXT: fldz ; KNL-NEXT: fld %st(0) ; KNL-NEXT: fcmovne %st(2), %st(0) -; KNL-NEXT: testb $1, %cl +; KNL-NEXT: kshiftrw $2, %k0, %k1 +; KNL-NEXT: kshiftrw $1, %k1, %k2 +; KNL-NEXT: kmovw %k2, %eax +; KNL-NEXT: testb $1, %al ; KNL-NEXT: fld %st(1) ; KNL-NEXT: fcmovne %st(3), %st(0) -; KNL-NEXT: kmovw %k2, %eax +; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: testb $1, %al ; KNL-NEXT: fld %st(2) ; KNL-NEXT: fcmovne %st(4), %st(0) -; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: testb $1, %al ; KNL-NEXT: fxch %st(3) ; KNL-NEXT: fcmovne %st(4), %st(0) ; KNL-NEXT: fstp %st(4) ; KNL-NEXT: fxch %st(3) +; KNL-NEXT: fstpt 20(%rdi) +; KNL-NEXT: fxch %st(1) ; KNL-NEXT: fstpt (%rdi) ; KNL-NEXT: fxch %st(1) ; KNL-NEXT: fstpt 30(%rdi) -; KNL-NEXT: fxch %st(1) -; KNL-NEXT: fstpt 20(%rdi) ; KNL-NEXT: fstpt 10(%rdi) ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq @@ -54,11 +54,11 @@ target triple = "x86_64-unknown-linux-gnu" ; SKX-NEXT: fldz ; SKX-NEXT: fld %st(0) ; SKX-NEXT: fcmovne %st(2), %st(0) -; SKX-NEXT: kshiftrw $1, %k0, %k2 -; SKX-NEXT: kmovd %k2, %eax +; SKX-NEXT: kmovd %k1, %eax ; SKX-NEXT: testb $1, %al ; SKX-NEXT: fld %st(1) ; SKX-NEXT: fcmovne %st(3), %st(0) +; SKX-NEXT: kshiftrw $1, %k0, %k1 ; SKX-NEXT: kmovd %k1, %eax ; SKX-NEXT: testb $1, %al ; SKX-NEXT: fld %st(2) @@ -71,9 +71,9 @@ target triple = "x86_64-unknown-linux-gnu" ; SKX-NEXT: fxch %st(3) ; SKX-NEXT: fstpt (%rdi) ; SKX-NEXT: fxch %st(1) -; SKX-NEXT: fstpt 20(%rdi) -; SKX-NEXT: fxch %st(1) ; SKX-NEXT: fstpt 10(%rdi) +; SKX-NEXT: fxch %st(1) +; SKX-NEXT: fstpt 20(%rdi) ; SKX-NEXT: fstpt 30(%rdi) ; SKX-NEXT: retq bb: |