summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIgor Breger <igor.breger@intel.com>2016-03-16 08:48:26 +0000
committerIgor Breger <igor.breger@intel.com>2016-03-16 08:48:26 +0000
commit0ba7b04f5f050b08452e221ba3a32198685879cd (patch)
treed4cfe674a3d294bc9625032b0318bf1dc302bc51
parent770c627ad084b58a370b8c4adc8b183fb16f1e2e (diff)
downloadbcm5719-llvm-0ba7b04f5f050b08452e221ba3a32198685879cd.tar.gz
bcm5719-llvm-0ba7b04f5f050b08452e221ba3a32198685879cd.zip
AVX512BW: Fix SRA v64i8 lowering. Use PCMPGTM (cmp result in k register) for 512bit vector because PCMPGT supported only for 128/256bit.
Differential Revision: http://reviews.llvm.org/D18204 llvm-svn: 263624
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp5
-rw-r--r--llvm/test/CodeGen/X86/vector-shift-ashr-512.ll18
2 files changed, 23 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index bf4cc0fb392..157c18bab1e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -19233,6 +19233,11 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
// ashr(R, 7) === cmp_slt(R, 0)
if (Op.getOpcode() == ISD::SRA && ShiftAmt == 7) {
SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
+ if (VT.is512BitVector()) {
+ assert(VT == MVT::v64i8 && "Unexpected element type!");
+ SDValue CMP = DAG.getNode(X86ISD::PCMPGTM, dl, MVT::v64i1, Zeros, R);
+ return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, CMP);
+ }
return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R);
}
diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
index 147e58f4710..eb074ec79e4 100644
--- a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
+++ b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll
@@ -376,3 +376,21 @@ define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind {
%shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
ret <64 x i8> %shift
}
+
+define <64 x i8> @ashr_const7_v64i8(<64 x i8> %a) {
+; AVX512DQ-LABEL: ashr_const7_v64i8:
+; AVX512DQ: ## BB#0:
+; AVX512DQ-NEXT: vpxor %ymm2, %ymm2, %ymm2
+; AVX512DQ-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0
+; AVX512DQ-NEXT: vpcmpgtb %ymm1, %ymm2, %ymm1
+; AVX512DQ-NEXT: retq
+;
+; AVX512BW-LABEL: ashr_const7_v64i8:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vpxord %zmm1, %zmm1, %zmm1
+; AVX512BW-NEXT: vpcmpgtb %zmm0, %zmm1, %k0
+; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
+; AVX512BW-NEXT: retq
+ %res = ashr <64 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+ ret <64 x i8> %res
+}
OpenPOWER on IntegriCloud