diff options
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 5 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shift-ashr-512.ll | 18 |
2 files changed, 23 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index bf4cc0fb392..157c18bab1e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -19233,6 +19233,11 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, // ashr(R, 7) === cmp_slt(R, 0) if (Op.getOpcode() == ISD::SRA && ShiftAmt == 7) { SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl); + if (VT.is512BitVector()) { + assert(VT == MVT::v64i8 && "Unexpected element type!"); + SDValue CMP = DAG.getNode(X86ISD::PCMPGTM, dl, MVT::v64i1, Zeros, R); + return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, CMP); + } return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R); } diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll index 147e58f4710..eb074ec79e4 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-512.ll @@ -376,3 +376,21 @@ define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind { %shift = ashr <64 x i8> %a, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> ret <64 x i8> %shift } + +define <64 x i8> @ashr_const7_v64i8(<64 x i8> %a) { +; AVX512DQ-LABEL: ashr_const7_v64i8: +; AVX512DQ: ## BB#0: +; AVX512DQ-NEXT: vpxor %ymm2, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpcmpgtb %ymm0, %ymm2, %ymm0 +; AVX512DQ-NEXT: vpcmpgtb %ymm1, %ymm2, %ymm1 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: ashr_const7_v64i8: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: vpxord %zmm1, %zmm1, %zmm1 +; AVX512BW-NEXT: vpcmpgtb %zmm0, %zmm1, %k0 +; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 +; AVX512BW-NEXT: retq + %res = ashr <64 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> + ret <64 x i8> %res +} |

