diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 31 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 34 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 6 | 
5 files changed, 32 insertions, 45 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 0ceb61c0e06..2d7c89d48e5 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -21888,6 +21888,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {    case X86ISD::AND:                return "X86ISD::AND";    case X86ISD::BEXTR:              return "X86ISD::BEXTR";    case X86ISD::MUL_IMM:            return "X86ISD::MUL_IMM"; +  case X86ISD::MOVMSK:             return "X86ISD::MOVMSK";    case X86ISD::PTEST:              return "X86ISD::PTEST";    case X86ISD::TESTP:              return "X86ISD::TESTP";    case X86ISD::TESTM:              return "X86ISD::TESTM"; @@ -24018,33 +24019,9 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,    case X86ISD::SETCC:      KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);      break; -  case ISD::INTRINSIC_WO_CHAIN: { -    unsigned IntId = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); -    unsigned NumLoBits = 0; -    switch (IntId) { -    default: break; -    case Intrinsic::x86_sse_movmsk_ps: -    case Intrinsic::x86_avx_movmsk_ps_256: -    case Intrinsic::x86_sse2_movmsk_pd: -    case Intrinsic::x86_avx_movmsk_pd_256: -    case Intrinsic::x86_mmx_pmovmskb: -    case Intrinsic::x86_sse2_pmovmskb_128: -    case Intrinsic::x86_avx2_pmovmskb: { -      // High bits of movmskp{s|d}, pmovmskb are known zero. -      switch (IntId) { -        default: llvm_unreachable("Impossible intrinsic");  // Can't reach here. -        case Intrinsic::x86_sse_movmsk_ps:      NumLoBits = 4; break; -        case Intrinsic::x86_avx_movmsk_ps_256:  NumLoBits = 8; break; -        case Intrinsic::x86_sse2_movmsk_pd:     NumLoBits = 2; break; -        case Intrinsic::x86_avx_movmsk_pd_256:  NumLoBits = 4; break; -        case Intrinsic::x86_mmx_pmovmskb:       NumLoBits = 8; break; -        case Intrinsic::x86_sse2_pmovmskb_128:  NumLoBits = 16; break; -        case Intrinsic::x86_avx2_pmovmskb:      NumLoBits = 32; break; -      } -      KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - NumLoBits); -      break; -    } -    } +  case X86ISD::MOVMSK: { +    unsigned NumLoBits = Op.getOperand(0).getValueType().getVectorNumElements(); +    KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - NumLoBits);      break;    }    } diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 89d52410aae..664eadd000a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -352,6 +352,9 @@ namespace llvm {        // X86-specific multiply by immediate.        MUL_IMM, +      // Vector sign bit extraction. +      MOVMSK, +        // Vector bitwise comparisons.        PTEST, diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index a5adfd35f0c..2c75ad3fbf4 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -276,6 +276,9 @@ def X86ktest   : SDNode<"X86ISD::KTEST", SDTX86CmpPTest>;  def X86testm   : SDNode<"X86ISD::TESTM", SDTX86Testm, [SDNPCommutative]>;  def X86testnm  : SDNode<"X86ISD::TESTNM", SDTX86Testm, [SDNPCommutative]>; +def X86movmsk : SDNode<"X86ISD::MOVMSK", +                        SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVec<1>]>>; +  def X86select  : SDNode<"X86ISD::SELECT"     , SDTSelect>;  def X86pmuludq : SDNode<"X86ISD::PMULUDQ", diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 70bcc2b2242..43289d97ca0 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -2765,25 +2765,23 @@ let Predicates = [HasAVX1Only] in {  //===----------------------------------------------------------------------===//  /// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave -multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm, -                                Domain d> { +multiclass sse12_extr_sign_mask<RegisterClass RC, ValueType vt, +                                string asm, Domain d> {    def rr : PI<0x50, MRMSrcReg, (outs GR32orGR64:$dst), (ins RC:$src),                !strconcat(asm, "\t{$src, $dst|$dst, $src}"), -              [(set GR32orGR64:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>, +              [(set GR32orGR64:$dst, (X86movmsk (vt RC:$src)))], IIC_SSE_MOVMSK, d>,                Sched<[WriteVecLogic]>;  }  let Predicates = [HasAVX] in { -  defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, -                                        "movmskps", SSEPackedSingle>, PS, VEX; -  defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, -                                        "movmskpd", SSEPackedDouble>, PD, VEX; -  defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256, -                                        "movmskps", SSEPackedSingle>, PS, -                                        VEX, VEX_L; -  defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256, -                                        "movmskpd", SSEPackedDouble>, PD, -                                        VEX, VEX_L; +  defm VMOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps", +                                        SSEPackedSingle>, PS, VEX; +  defm VMOVMSKPD : sse12_extr_sign_mask<VR128, v2f64, "movmskpd", +                                        SSEPackedDouble>, PD, VEX; +  defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, v8f32, "movmskps", +                                         SSEPackedSingle>, PS, VEX, VEX_L; +  defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, v4f64, "movmskpd", +                                         SSEPackedDouble>, PD, VEX, VEX_L;    def : Pat<(i32 (X86fgetsign FR32:$src)),              (VMOVMSKPSrr (COPY_TO_REGCLASS FR32:$src, VR128))>; @@ -2797,9 +2795,9 @@ let Predicates = [HasAVX] in {               (VMOVMSKPDrr (COPY_TO_REGCLASS FR64:$src, VR128)), sub_32bit)>;  } -defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps", +defm MOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps",                                       SSEPackedSingle>, PS; -defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd", +defm MOVMSKPD : sse12_extr_sign_mask<VR128, v2f64, "movmskpd",                                       SSEPackedDouble>, PD;  def : Pat<(i32 (X86fgetsign FR32:$src)), @@ -4665,20 +4663,20 @@ let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in {  def VPMOVMSKBrr  : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),             (ins VR128:$src),             "pmovmskb\t{$src, $dst|$dst, $src}", -           [(set GR32orGR64:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))], +           [(set GR32orGR64:$dst, (X86movmsk (v16i8 VR128:$src)))],             IIC_SSE_MOVMSK>, VEX;  let Predicates = [HasAVX2] in {  def VPMOVMSKBYrr  : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),             (ins VR256:$src),             "pmovmskb\t{$src, $dst|$dst, $src}", -           [(set GR32orGR64:$dst, (int_x86_avx2_pmovmskb VR256:$src))]>, +           [(set GR32orGR64:$dst, (X86movmsk (v32i8 VR256:$src)))]>,             VEX, VEX_L;  }  def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src),             "pmovmskb\t{$src, $dst|$dst, $src}", -           [(set GR32orGR64:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))], +           [(set GR32orGR64:$dst, (X86movmsk (v16i8 VR128:$src)))],             IIC_SSE_MOVMSK>;  } // ExeDomain = SSEPackedInt diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 1c8ec14a37f..d1e26256558 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -321,6 +321,8 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {    X86_INTRINSIC_DATA(avx_max_ps_256,    INTR_TYPE_2OP, X86ISD::FMAX, 0),    X86_INTRINSIC_DATA(avx_min_pd_256,    INTR_TYPE_2OP, X86ISD::FMIN, 0),    X86_INTRINSIC_DATA(avx_min_ps_256,    INTR_TYPE_2OP, X86ISD::FMIN, 0), +  X86_INTRINSIC_DATA(avx_movmsk_pd_256, INTR_TYPE_1OP, X86ISD::MOVMSK, 0), +  X86_INTRINSIC_DATA(avx_movmsk_ps_256, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),    X86_INTRINSIC_DATA(avx_rcp_ps_256,    INTR_TYPE_1OP, X86ISD::FRCP, 0),    X86_INTRINSIC_DATA(avx_rsqrt_ps_256,  INTR_TYPE_1OP, X86ISD::FRSQRT, 0),    X86_INTRINSIC_DATA(avx_sqrt_pd_256,   INTR_TYPE_1OP, ISD::FSQRT, 0), @@ -354,6 +356,7 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {    X86_INTRINSIC_DATA(avx2_pminu_b, INTR_TYPE_2OP, ISD::UMIN, 0),    X86_INTRINSIC_DATA(avx2_pminu_d, INTR_TYPE_2OP, ISD::UMIN, 0),    X86_INTRINSIC_DATA(avx2_pminu_w, INTR_TYPE_2OP, ISD::UMIN, 0), +  X86_INTRINSIC_DATA(avx2_pmovmskb, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),    X86_INTRINSIC_DATA(avx2_pmovsxbd, INTR_TYPE_1OP, X86ISD::VSEXT, 0),    X86_INTRINSIC_DATA(avx2_pmovsxbq, INTR_TYPE_1OP, X86ISD::VSEXT, 0),    X86_INTRINSIC_DATA(avx2_pmovsxbw, INTR_TYPE_1OP, X86ISD::VSEXT, 0), @@ -2184,6 +2187,7 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {    X86_INTRINSIC_DATA(sse_comineq_ss,    COMI, X86ISD::COMI, ISD::SETNE),    X86_INTRINSIC_DATA(sse_max_ps,        INTR_TYPE_2OP, X86ISD::FMAX, 0),    X86_INTRINSIC_DATA(sse_min_ps,        INTR_TYPE_2OP, X86ISD::FMIN, 0), +  X86_INTRINSIC_DATA(sse_movmsk_ps,     INTR_TYPE_1OP, X86ISD::MOVMSK, 0),    X86_INTRINSIC_DATA(sse_rcp_ps,        INTR_TYPE_1OP, X86ISD::FRCP, 0),    X86_INTRINSIC_DATA(sse_rsqrt_ps,      INTR_TYPE_1OP, X86ISD::FRSQRT, 0),    X86_INTRINSIC_DATA(sse_sqrt_ps,       INTR_TYPE_1OP, ISD::FSQRT, 0), @@ -2201,6 +2205,7 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {    X86_INTRINSIC_DATA(sse2_comineq_sd,   COMI, X86ISD::COMI, ISD::SETNE),    X86_INTRINSIC_DATA(sse2_max_pd,       INTR_TYPE_2OP, X86ISD::FMAX, 0),    X86_INTRINSIC_DATA(sse2_min_pd,       INTR_TYPE_2OP, X86ISD::FMIN, 0), +  X86_INTRINSIC_DATA(sse2_movmsk_pd,    INTR_TYPE_1OP, X86ISD::MOVMSK, 0),    X86_INTRINSIC_DATA(sse2_packssdw_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),    X86_INTRINSIC_DATA(sse2_packsswb_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),    X86_INTRINSIC_DATA(sse2_packuswb_128, INTR_TYPE_2OP, X86ISD::PACKUS, 0), @@ -2210,6 +2215,7 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {    X86_INTRINSIC_DATA(sse2_pmaxu_b,      INTR_TYPE_2OP, ISD::UMAX, 0),    X86_INTRINSIC_DATA(sse2_pmins_w,      INTR_TYPE_2OP, ISD::SMIN, 0),    X86_INTRINSIC_DATA(sse2_pminu_b,      INTR_TYPE_2OP, ISD::UMIN, 0), +  X86_INTRINSIC_DATA(sse2_pmovmskb_128, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),    X86_INTRINSIC_DATA(sse2_pmulh_w,      INTR_TYPE_2OP, ISD::MULHS, 0),    X86_INTRINSIC_DATA(sse2_pmulhu_w,     INTR_TYPE_2OP, ISD::MULHU, 0),    X86_INTRINSIC_DATA(sse2_pmulu_dq,     INTR_TYPE_2OP, X86ISD::PMULUDQ, 0),  | 

