summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-02-28 22:23:55 +0000
committerCraig Topper <craig.topper@intel.com>2018-02-28 22:23:55 +0000
commite31b9d1e5f1af55e4eea4826a91b8a0c5f6ec71d (patch)
treee64ef9ad598c449b367c82a1350a484c0f03362c
parent4142369204efa08adfcc70e5b0af476d6403a968 (diff)
downloadbcm5719-llvm-e31b9d1e5f1af55e4eea4826a91b8a0c5f6ec71d.tar.gz
bcm5719-llvm-e31b9d1e5f1af55e4eea4826a91b8a0c5f6ec71d.zip
[X86] Lower extract_element from k-registers by bitcasting from v16i1 to i16 and extending/truncating.
This is equivalent to what isel was doing anyway but by canonicalizing earlier we can remove some patterns. llvm-svn: 326375
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp41
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td3
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td4
-rw-r--r--llvm/test/CodeGen/X86/avx512-insert-extract.ll2
-rw-r--r--llvm/test/CodeGen/X86/pr33349.ll24
5 files changed, 33 insertions, 41 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 57e50d30af8..8aecfd8fe7d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -14910,36 +14910,35 @@ static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt);
}
- // Canonicalize result type to MVT::i32.
- if (EltVT != MVT::i32) {
- SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
- Vec, Idx);
- return DAG.getAnyExtOrTrunc(Extract, dl, EltVT);
- }
-
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
- // Extracts from element 0 are always allowed.
- if (IdxVal == 0)
- return Op;
-
// If the kshift instructions of the correct width aren't natively supported
// then we need to promote the vector to the native size to get the correct
// zeroing behavior.
- if ((!Subtarget.hasDQI() && (VecVT.getVectorNumElements() == 8)) ||
- (VecVT.getVectorNumElements() < 8)) {
+ if (VecVT.getVectorNumElements() < 16) {
VecVT = MVT::v16i1;
- Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VecVT,
- DAG.getUNDEF(VecVT),
- Vec,
+ Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1,
+ DAG.getUNDEF(VecVT), Vec,
DAG.getIntPtrConstant(0, dl));
}
- // Use kshiftr instruction to move to the lower element.
- Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
- DAG.getConstant(IdxVal, dl, MVT::i8));
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, Vec,
- DAG.getIntPtrConstant(0, dl));
+ // Extracts from element 0 are always allowed.
+ if (IdxVal != 0) {
+ // Use kshiftr instruction to move to the lower element.
+ Vec = DAG.getNode(X86ISD::KSHIFTR, dl, VecVT, Vec,
+ DAG.getConstant(IdxVal, dl, MVT::i8));
+ }
+
+ // Shrink to v16i1 since that's always legal.
+ if (VecVT.getVectorNumElements() > 16) {
+ VecVT = MVT::v16i1;
+ Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VecVT, Vec,
+ DAG.getIntPtrConstant(0, dl));
+ }
+
+ // Convert to a bitcast+aext/trunc.
+ MVT CastVT = MVT::getIntegerVT(VecVT.getVectorNumElements());
+ return DAG.getAnyExtOrTrunc(DAG.getBitcast(CastVT, Vec), dl, EltVT);
}
SDValue
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index e8ebbb33509..91e4aca1489 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -2859,9 +2859,6 @@ let Predicates = [HasAVX512] in {
def : Pat<(maskVT (scalar_to_vector GR32:$src)),
(COPY_TO_REGCLASS GR32:$src, maskRC)>;
- def : Pat<(i32 (X86kextract maskRC:$src, (iPTR 0))),
- (COPY_TO_REGCLASS maskRC:$src, GR32)>;
-
def : Pat<(maskVT (scalar_to_vector GR8:$src)),
(COPY_TO_REGCLASS (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit), maskRC)>;
}
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index aa6fcb19b41..36e8073e9c5 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -457,10 +457,6 @@ def X86SubVBroadcast : SDNode<"X86ISD::SUBV_BROADCAST",
def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>;
def X86VBroadcastm : SDNode<"X86ISD::VBROADCASTM", SDTVBroadcastm>;
-def X86kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
- SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
- SDTCVecEltisVT<1, i1>,
- SDTCisPtrTy<2>]>>;
def X86Blendi : SDNode<"X86ISD::BLENDI", SDTBlend>;
diff --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
index 236985505e1..605d51eaef7 100644
--- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll
+++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
@@ -269,7 +269,7 @@ define i64 @test14(<8 x i64>%a, <8 x i64>%b, i64 %a1, i64 %b1) {
; SKX-LABEL: test14:
; SKX: ## %bb.0:
; SKX-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
-; SKX-NEXT: kshiftrb $4, %k0, %k0
+; SKX-NEXT: kshiftrw $4, %k0, %k0
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: testb $1, %al
; SKX-NEXT: cmoveq %rsi, %rdi
diff --git a/llvm/test/CodeGen/X86/pr33349.ll b/llvm/test/CodeGen/X86/pr33349.ll
index 6ca02a10729..ec9f2381928 100644
--- a/llvm/test/CodeGen/X86/pr33349.ll
+++ b/llvm/test/CodeGen/X86/pr33349.ll
@@ -12,32 +12,32 @@ target triple = "x86_64-unknown-linux-gnu"
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT: kshiftrw $1, %k0, %k1
; KNL-NEXT: kmovw %k1, %eax
-; KNL-NEXT: kshiftrw $2, %k0, %k1
-; KNL-NEXT: kshiftrw $1, %k1, %k2
-; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: testb $1, %al
; KNL-NEXT: fld1
; KNL-NEXT: fldz
; KNL-NEXT: fld %st(0)
; KNL-NEXT: fcmovne %st(2), %st(0)
-; KNL-NEXT: testb $1, %cl
+; KNL-NEXT: kshiftrw $2, %k0, %k1
+; KNL-NEXT: kshiftrw $1, %k1, %k2
+; KNL-NEXT: kmovw %k2, %eax
+; KNL-NEXT: testb $1, %al
; KNL-NEXT: fld %st(1)
; KNL-NEXT: fcmovne %st(3), %st(0)
-; KNL-NEXT: kmovw %k2, %eax
+; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: testb $1, %al
; KNL-NEXT: fld %st(2)
; KNL-NEXT: fcmovne %st(4), %st(0)
-; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: kmovw %k1, %eax
; KNL-NEXT: testb $1, %al
; KNL-NEXT: fxch %st(3)
; KNL-NEXT: fcmovne %st(4), %st(0)
; KNL-NEXT: fstp %st(4)
; KNL-NEXT: fxch %st(3)
+; KNL-NEXT: fstpt 20(%rdi)
+; KNL-NEXT: fxch %st(1)
; KNL-NEXT: fstpt (%rdi)
; KNL-NEXT: fxch %st(1)
; KNL-NEXT: fstpt 30(%rdi)
-; KNL-NEXT: fxch %st(1)
-; KNL-NEXT: fstpt 20(%rdi)
; KNL-NEXT: fstpt 10(%rdi)
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
@@ -54,11 +54,11 @@ target triple = "x86_64-unknown-linux-gnu"
; SKX-NEXT: fldz
; SKX-NEXT: fld %st(0)
; SKX-NEXT: fcmovne %st(2), %st(0)
-; SKX-NEXT: kshiftrw $1, %k0, %k2
-; SKX-NEXT: kmovd %k2, %eax
+; SKX-NEXT: kmovd %k1, %eax
; SKX-NEXT: testb $1, %al
; SKX-NEXT: fld %st(1)
; SKX-NEXT: fcmovne %st(3), %st(0)
+; SKX-NEXT: kshiftrw $1, %k0, %k1
; SKX-NEXT: kmovd %k1, %eax
; SKX-NEXT: testb $1, %al
; SKX-NEXT: fld %st(2)
@@ -71,9 +71,9 @@ target triple = "x86_64-unknown-linux-gnu"
; SKX-NEXT: fxch %st(3)
; SKX-NEXT: fstpt (%rdi)
; SKX-NEXT: fxch %st(1)
-; SKX-NEXT: fstpt 20(%rdi)
-; SKX-NEXT: fxch %st(1)
; SKX-NEXT: fstpt 10(%rdi)
+; SKX-NEXT: fxch %st(1)
+; SKX-NEXT: fstpt 20(%rdi)
; SKX-NEXT: fstpt 30(%rdi)
; SKX-NEXT: retq
bb:
OpenPOWER on IntegriCloud