diff options
author | Igor Breger <igor.breger@intel.com> | 2016-03-16 08:48:26 +0000 |
---|---|---|
committer | Igor Breger <igor.breger@intel.com> | 2016-03-16 08:48:26 +0000 |
commit | 0ba7b04f5f050b08452e221ba3a32198685879cd (patch) | |
tree | d4cfe674a3d294bc9625032b0318bf1dc302bc51 | |
parent | 770c627ad084b58a370b8c4adc8b183fb16f1e2e (diff) | |
download | bcm5719-llvm-0ba7b04f5f050b08452e221ba3a32198685879cd.tar.gz bcm5719-llvm-0ba7b04f5f050b08452e221ba3a32198685879cd.zip |
AVX512BW: Fix SRA v64i8 lowering. Use PCMPGTM (cmp result in k register) for 512bit vector because PCMPGT supported only for 128/256bit.
Differential Revision: http://reviews.llvm.org/D18204
llvm-svn: 263624
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 5 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-shift-ashr-512.ll | 18 |
2 files changed, 23 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index bf4cc0fb392..157c18bab1e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -19233,6 +19233,11 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, // ashr(R, 7) === cmp_slt(R, 0) if (Op.getOpcode() == ISD::SRA && ShiftAmt == 7) { SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl); + if (VT.is512BitVector()) { + assert(VT == MVT::v64i8 && "Unexpected element type!"); + SDValue CMP = DAG.getNode(X86ISD::PCMPGTM, dl, MVT::v64i1, Zeros, R); + return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, CMP); + } return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R); } diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll index 147e58f4710..eb074ec79e4 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll @@ -376,3 +376,21 @@ define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind { %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> ret <64 x i8> %shift } + +define <64 x i8> @ashr_const7_v64i8(<64 x i8> %a) { +; AVX512DQ-LABEL: ashr_const7_v64i8: +; AVX512DQ: ## BB#0: +; AVX512DQ-NEXT: vpxor %ymm2, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 +; AVX512DQ-NEXT: vpcmpgtb %ymm1, %ymm2, %ymm1 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: ashr_const7_v64i8: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: vpxord %zmm1, %zmm1, %zmm1 +; AVX512BW-NEXT: vpcmpgtb %zmm0, %zmm1, %k0 +; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 +; AVX512BW-NEXT: retq + %res = ashr <64 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> + ret <64 x i8> %res +} |