diff options
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 23 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 29 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-trunc-ext.ll | 29 |
3 files changed, 79 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 64e0db1e70f..f57da0a1d6f 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -15119,13 +15119,32 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops); } -static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, SelectionDAG &DAG) { +static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { MVT VT = Op->getSimpleValueType(0); SDValue In = Op->getOperand(0); MVT InVT = In.getSimpleValueType(); + MVT VTElt = VT.getVectorElementType(); + MVT InVTElt = InVT.getVectorElementType(); SDLoc dl(Op); + // SKX processor + if ((InVTElt == MVT::i1) && + (((Subtarget->hasBWI() && Subtarget->hasVLX() && + VT.getSizeInBits() <= 256 && VTElt.getSizeInBits() <= 16)) || + + ((Subtarget->hasBWI() && VT.is512BitVector() && + VTElt.getSizeInBits() <= 16)) || + + ((Subtarget->hasDQI() && Subtarget->hasVLX() && + VT.getSizeInBits() <= 256 && VTElt.getSizeInBits() >= 32)) || + + ((Subtarget->hasDQI() && VT.is512BitVector() && + VTElt.getSizeInBits() >= 32)))) + return DAG.getNode(X86ISD::VSEXT, dl, VT, In); + unsigned int NumElts = VT.getVectorNumElements(); + if (NumElts != 8 && NumElts != 16) return SDValue(); @@ -15158,7 +15177,7 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget *Subtarget, SDLoc dl(Op); if (VT.is512BitVector() || InVT.getVectorElementType() == MVT::i1) - return LowerSIGN_EXTEND_AVX512(Op, DAG); + return LowerSIGN_EXTEND_AVX512(Op, Subtarget, DAG); if ((VT != MVT::v4i64 || InVT != MVT::v4i32) && (VT != MVT::v8i32 || InVT != MVT::v8i16) && diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 10055d065dc..b205de058a6 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -5001,3 +5001,32 @@ def truncstorei1 : PatFrag<(ops node:$val, node:$ptr), def : Pat<(truncstorei1 GR8:$src, addr:$dst), (MOV8mr addr:$dst, GR8:$src)>; +multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > { +def rr : AVX512XS8I<opc, MRMDestReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src), + !strconcat(OpcodeStr##Vec.Suffix, " \t{$src, $dst|$dst, $src}"), + [(set Vec.RC:$dst, (Vec.VT (X86vsext Vec.KRC:$src)))]>, EVEX; +} + +multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo, + string OpcodeStr, Predicate prd> { +let Predicates = [prd] in + defm Z : cvt_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512; + + let Predicates = [prd, HasVLX] in { + defm Z256 : cvt_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256; + defm Z128 : cvt_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128; + } +} + +multiclass avx512_convert_mask_to_vector<string OpcodeStr> { + defm NAME##B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, OpcodeStr, + HasBWI>; + defm NAME##W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, OpcodeStr, + HasBWI>, VEX_W; + defm NAME##D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, OpcodeStr, + HasDQI>; + defm NAME##Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, OpcodeStr, + HasDQI>, VEX_W; +} + +defm VPMOVM2 : avx512_convert_mask_to_vector<"vpmovm2">; diff --git a/llvm/test/CodeGen/X86/avx512-trunc-ext.ll b/llvm/test/CodeGen/X86/avx512-trunc-ext.ll index f1b639e110f..91ef5d58f43 100644 --- a/llvm/test/CodeGen/X86/avx512-trunc-ext.ll +++ b/llvm/test/CodeGen/X86/avx512-trunc-ext.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX %s ; CHECK-LABEL: trunc_16x32_to_16x8 ; CHECK: vpmovdb @@ -118,6 +119,7 @@ define i8 @trunc_8i16_to_8i1(<8 x i16> %a) { ; CHECK-LABEL: sext_8i1_8i32 ; CHECK: vpbroadcastq LCP{{.*}}(%rip), %zmm0 {%k1} {z} +; SKX: vpmovm2d ; CHECK: ret define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind { %x = icmp slt <8 x i32> %a1, %a2 @@ -145,3 +147,30 @@ define i16 @trunc_i32_to_i1(i32 %a) { %res = bitcast <16 x i1> %maskv to i16 ret i16 %res } + +; CHECK-LABEL: sext_8i1_8i16 +; SKX: vpmovm2w +; CHECK: ret +define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind { + %x = icmp slt <8 x i32> %a1, %a2 + %y = sext <8 x i1> %x to <8 x i16> + ret <8 x i16> %y +} + +; CHECK-LABEL: sext_16i1_16i32 +; SKX: vpmovm2d +; CHECK: ret +define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind { + %x = icmp slt <16 x i32> %a1, %a2 + %y = sext <16 x i1> %x to <16 x i32> + ret <16 x i32> %y +} + +; CHECK-LABEL: sext_8i1_8i64 +; SKX: vpmovm2q +; CHECK: ret +define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind { + %x = icmp slt <8 x i32> %a1, %a2 + %y = sext <8 x i1> %x to <8 x i64> + ret <8 x i64> %y +} |