diff options
| author | Craig Topper <craig.topper@gmail.com> | 2017-02-22 06:54:18 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@gmail.com> | 2017-02-22 06:54:18 +0000 |
| commit | 56d4022997945fc1ea56bb7c439042294d5529db (patch) | |
| tree | e9076735365f9d339e586b7104f7228ad1d9c900 /llvm/lib | |
| parent | 5cd6c5cacff5f2ec87724b7c4079b493bbbe097f (diff) | |
| download | bcm5719-llvm-56d4022997945fc1ea56bb7c439042294d5529db.tar.gz bcm5719-llvm-56d4022997945fc1ea56bb7c439042294d5529db.zip | |
[AVX-512] Allow legacy scalar min/max intrinsics to select EVEX instructions when available
This patch introduces new X86ISD::FMAXS and X86ISD::FMINS opcodes. The legacy intrinsics now lower to this node. As do the AVX-512 masked intrinsics when the rounding mode is CUR_DIRECTION.
I've merged a copy of the tablegen multiclass avx512_fp_scalar into avx512_fp_scalar_sae. avx512_fp_scalar still needs to support CUR_DIRECTION appearing as a rounding mode for X86ISD::FADD_ROUND and others.
Differential revision: https://reviews.llvm.org/D30186
llvm-svn: 295810
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 10 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 56 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrFormats.td | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 37 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 20 |
7 files changed, 85 insertions, 45 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c03d6aff5a8..faabe37d4fa 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -18991,6 +18991,14 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget SDValue Src2 = Op.getOperand(2); SDValue passThru = Op.getOperand(3); SDValue Mask = Op.getOperand(4); + unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; + if (IntrWithRoundingModeOpcode != 0) { + SDValue Rnd = Op.getOperand(5); + if (!isRoundModeCurDirection(Rnd)) + return getScalarMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode, + dl, VT, Src1, Src2, Rnd), + Mask, passThru, Subtarget, DAG); + } return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2), Mask, passThru, Subtarget, DAG); } @@ -23910,8 +23918,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::ABS: return "X86ISD::ABS"; case X86ISD::CONFLICT: return "X86ISD::CONFLICT"; case X86ISD::FMAX: return "X86ISD::FMAX"; + case X86ISD::FMAXS: return "X86ISD::FMAXS"; case X86ISD::FMAX_RND: return "X86ISD::FMAX_RND"; case X86ISD::FMIN: return "X86ISD::FMIN"; + case X86ISD::FMINS: return "X86ISD::FMINS"; case X86ISD::FMIN_RND: return "X86ISD::FMIN_RND"; case X86ISD::FMAXC: return "X86ISD::FMAXC"; case X86ISD::FMINC: return "X86ISD::FMINC"; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index e783d9e1e92..27668b28e5a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -251,6 +251,9 @@ namespace llvm { /// Commutative FMIN and FMAX. FMAXC, FMINC, + /// Scalar intrinsic floating point max and min. + FMAXS, FMINS, + /// Floating point reciprocal-sqrt and reciprocal approximation. /// Note that these typically require refinement /// in order to obtain suitable precision. diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 527e86d94b9..ba220061d34 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -4195,13 +4195,43 @@ multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo EVEX_B, EVEX_RC; } multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, - SDNode VecNode, OpndItins itins, bit IsCommutable> { - let ExeDomain = _.ExeDomain in + SDNode OpNode, SDNode VecNode, SDNode SaeNode, + OpndItins itins, bit IsCommutable> { + let ExeDomain = _.ExeDomain in { + defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.RC:$src2), OpcodeStr, + "$src2, $src1", "$src1, $src2", + (_.VT (VecNode _.RC:$src1, _.RC:$src2)), + itins.rr>; + + defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, + "$src2, $src1", "$src1, $src2", + (_.VT (VecNode _.RC:$src1, + _.ScalarIntMemCPat:$src2)), + itins.rm>; + + let isCodeGenOnly = 1, Predicates = [HasAVX512] in { + def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), + (ins _.FRC:$src1, _.FRC:$src2), + OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))], + itins.rr> { + let isCommutable = IsCommutable; + } + def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), + (ins _.FRC:$src1, _.ScalarMemOp:$src2), + OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set _.FRC:$dst, (OpNode _.FRC:$src1, + (_.ScalarLdFrag addr:$src2)))], itins.rm>; + } + defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2), OpcodeStr, "{sae}, $src2, $src1", "$src1, $src2, {sae}", - (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), + (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), (i32 FROUND_NO_EXC))>, EVEX_B; + } } multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode, @@ -4220,25 +4250,23 @@ multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode, } multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode, - SDNode VecNode, + SDNode VecNode, SDNode SaeNode, SizeItins itins, bit IsCommutable> { - defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode, - itins.s, IsCommutable>, - avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, VecNode, - itins.s, IsCommutable>, + defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode, + VecNode, SaeNode, itins.s, IsCommutable>, XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; - defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode, - itins.d, IsCommutable>, - avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, VecNode, - itins.d, IsCommutable>, + defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode, + VecNode, SaeNode, itins.d, IsCommutable>, XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; } defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnd, SSE_ALU_ITINS_S, 1>; defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnd, SSE_MUL_ITINS_S, 1>; defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnd, SSE_ALU_ITINS_S, 0>; defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnd, SSE_DIV_ITINS_S, 0>; -defm VMIN : avx512_binop_s_sae <0x5D, "vmin", X86fmin, X86fminRnd, SSE_ALU_ITINS_S, 0>; -defm VMAX : avx512_binop_s_sae <0x5F, "vmax", X86fmax, X86fmaxRnd, SSE_ALU_ITINS_S, 0>; +defm VMIN : avx512_binop_s_sae <0x5D, "vmin", X86fmin, X86fmins, X86fminRnd, + SSE_ALU_ITINS_S, 0>; +defm VMAX : avx512_binop_s_sae <0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnd, + SSE_ALU_ITINS_S, 0>; // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use // X86fminc and X86fmaxc instead of X86fmin and X86fmax diff --git a/llvm/lib/Target/X86/X86InstrFormats.td b/llvm/lib/Target/X86/X86InstrFormats.td index 824e4df5716..c2fe786732d 100644 --- a/llvm/lib/Target/X86/X86InstrFormats.td +++ b/llvm/lib/Target/X86/X86InstrFormats.td @@ -455,7 +455,7 @@ class SI_Int<bits<8> o, Format F, dag outs, dag ins, string asm, Domain d = GenericDomain> : I<o, F, outs, ins, asm, pattern, itin, d> { let Predicates = !if(!eq(OpEnc.Value, EncEVEX.Value), [HasAVX512], - !if(!eq(OpEnc.Value, EncVEX.Value), [HasAVX], + !if(!eq(OpEnc.Value, EncVEX.Value), [UseAVX], !if(!eq(OpPrefix.Value, XS.Value), [UseSSE1], !if(!eq(OpPrefix.Value, XD.Value), [UseSSE2], !if(!eq(OpPrefix.Value, PD.Value), [UseSSE2], diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 089e450b57c..3a60f486f53 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -42,6 +42,8 @@ def SDTX86CmpTestSae : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>; def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>; +def X86fmins : SDNode<"X86ISD::FMINS", SDTFPBinOp>; +def X86fmaxs : SDNode<"X86ISD::FMAXS", SDTFPBinOp>; // Commutative and Associative FMIN and FMAX. def X86fminc : SDNode<"X86ISD::FMINC", SDTFPBinOp, diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 2417cdc1904..918c1845aa3 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -259,8 +259,8 @@ multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, /// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, - SDPatternOperator Int, RegisterClass RC, - string asm, Operand memopr, + SDPatternOperator OpNode, RegisterClass RC, + ValueType VT, string asm, Operand memopr, ComplexPattern mem_cpat, Domain d, OpndItins itins, bit Is2Addr = 1> { let isCodeGenOnly = 1, hasSideEffects = 0 in { @@ -268,14 +268,14 @@ let isCodeGenOnly = 1, hasSideEffects = 0 in { !if(Is2Addr, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr, d>, + [(set RC:$dst, (VT (OpNode RC:$src1, RC:$src2)))], itins.rr, d>, Sched<[itins.Sched]>; let mayLoad = 1 in def rm_Int : SI_Int<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2), !if(Is2Addr, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), - [(set RC:$dst, (Int RC:$src1, mem_cpat:$src2))], itins.rm, d>, + [(set RC:$dst, (VT (OpNode RC:$src1, mem_cpat:$src2)))], itins.rm, d>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } } @@ -3047,21 +3047,20 @@ multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, } multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr, - SDPatternOperator IntSS, - SDPatternOperator IntSD, + SDPatternOperator OpNode, SizeItins itins> { - defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, IntSS, VR128, + defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v4f32, !strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32, SSEPackedSingle, itins.s, 0>, XS, VEX_4V, VEX_LIG, VEX_WIG; - defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, IntSD, VR128, + defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v2f64, !strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64, SSEPackedDouble, itins.d, 0>, XD, VEX_4V, VEX_LIG, VEX_WIG; let Constraints = "$src1 = $dst" in { - defm SS : sse12_fp_scalar_int<opc, OpcodeStr, IntSS, VR128, + defm SS : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v4f32, !strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32, SSEPackedSingle, itins.s>, XS; - defm SD : sse12_fp_scalar_int<opc, OpcodeStr, IntSD, VR128, + defm SD : sse12_fp_scalar_int<opc, OpcodeStr, OpNode, VR128, v2f64, !strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64, SSEPackedDouble, itins.d>, XD; } @@ -3070,29 +3069,23 @@ multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr, // Binary Arithmetic instructions defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>, basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>, - basic_sse12_fp_binop_s_int<0x58, "add", null_frag, null_frag, - SSE_ALU_ITINS_S>; + basic_sse12_fp_binop_s_int<0x58, "add", null_frag, SSE_ALU_ITINS_S>; defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>, basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S>, - basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, null_frag, - SSE_MUL_ITINS_S>; + basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, SSE_MUL_ITINS_S>; let isCommutable = 0 in { defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>, basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>, - basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag, null_frag, - SSE_ALU_ITINS_S>; + basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag,SSE_ALU_ITINS_S>; defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>, basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S>, - basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, null_frag, - SSE_DIV_ITINS_S>; + basic_sse12_fp_binop_s_int<0x5E, "div", null_frag,SSE_DIV_ITINS_S>; defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>, basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>, - basic_sse12_fp_binop_s_int<0x5F, "max", int_x86_sse_max_ss, - int_x86_sse2_max_sd, SSE_ALU_ITINS_S>; + basic_sse12_fp_binop_s_int<0x5F, "max", X86fmaxs, SSE_ALU_ITINS_S>; defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>, basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>, - basic_sse12_fp_binop_s_int<0x5D, "min", int_x86_sse_min_ss, - int_x86_sse2_min_sd, SSE_ALU_ITINS_S>; + basic_sse12_fp_binop_s_int<0x5D, "min", X86fmins, SSE_ALU_ITINS_S>; } let isCodeGenOnly = 1 in { diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 775e7a7dd18..151bda487c1 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -811,18 +811,18 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::FMAX_RND), X86_INTRINSIC_DATA(avx512_mask_max_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX, X86ISD::FMAX_RND), - X86_INTRINSIC_DATA(avx512_mask_max_sd_round, INTR_TYPE_SCALAR_MASK_RM, - X86ISD::FMAX_RND, 0), - X86_INTRINSIC_DATA(avx512_mask_max_ss_round, INTR_TYPE_SCALAR_MASK_RM, - X86ISD::FMAX_RND, 0), + X86_INTRINSIC_DATA(avx512_mask_max_sd_round, INTR_TYPE_SCALAR_MASK, + X86ISD::FMAXS, X86ISD::FMAX_RND), + X86_INTRINSIC_DATA(avx512_mask_max_ss_round, INTR_TYPE_SCALAR_MASK, + X86ISD::FMAXS, X86ISD::FMAX_RND), X86_INTRINSIC_DATA(avx512_mask_min_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FMIN, X86ISD::FMIN_RND), X86_INTRINSIC_DATA(avx512_mask_min_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FMIN, X86ISD::FMIN_RND), - X86_INTRINSIC_DATA(avx512_mask_min_sd_round, INTR_TYPE_SCALAR_MASK_RM, - X86ISD::FMIN_RND, 0), - X86_INTRINSIC_DATA(avx512_mask_min_ss_round, INTR_TYPE_SCALAR_MASK_RM, - X86ISD::FMIN_RND, 0), + X86_INTRINSIC_DATA(avx512_mask_min_sd_round, INTR_TYPE_SCALAR_MASK, + X86ISD::FMINS, X86ISD::FMIN_RND), + X86_INTRINSIC_DATA(avx512_mask_min_ss_round, INTR_TYPE_SCALAR_MASK, + X86ISD::FMINS, X86ISD::FMIN_RND), X86_INTRINSIC_DATA(avx512_mask_mul_pd_512, INTR_TYPE_2OP_MASK, ISD::FMUL, X86ISD::FMUL_RND), X86_INTRINSIC_DATA(avx512_mask_mul_ps_512, INTR_TYPE_2OP_MASK, ISD::FMUL, @@ -1604,7 +1604,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(sse_comilt_ss, COMI, X86ISD::COMI, ISD::SETLT), X86_INTRINSIC_DATA(sse_comineq_ss, COMI, X86ISD::COMI, ISD::SETNE), X86_INTRINSIC_DATA(sse_max_ps, INTR_TYPE_2OP, X86ISD::FMAX, 0), + X86_INTRINSIC_DATA(sse_max_ss, INTR_TYPE_2OP, X86ISD::FMAXS, 0), X86_INTRINSIC_DATA(sse_min_ps, INTR_TYPE_2OP, X86ISD::FMIN, 0), + X86_INTRINSIC_DATA(sse_min_ss, INTR_TYPE_2OP, X86ISD::FMINS, 0), X86_INTRINSIC_DATA(sse_movmsk_ps, INTR_TYPE_1OP, X86ISD::MOVMSK, 0), X86_INTRINSIC_DATA(sse_rcp_ps, INTR_TYPE_1OP, X86ISD::FRCP, 0), X86_INTRINSIC_DATA(sse_rsqrt_ps, INTR_TYPE_1OP, X86ISD::FRSQRT, 0), @@ -1627,7 +1629,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(sse2_cvttpd2dq, INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0), X86_INTRINSIC_DATA(sse2_cvttps2dq, INTR_TYPE_1OP, ISD::FP_TO_SINT, 0), X86_INTRINSIC_DATA(sse2_max_pd, INTR_TYPE_2OP, X86ISD::FMAX, 0), + X86_INTRINSIC_DATA(sse2_max_sd, INTR_TYPE_2OP, X86ISD::FMAXS, 0), X86_INTRINSIC_DATA(sse2_min_pd, INTR_TYPE_2OP, X86ISD::FMIN, 0), + X86_INTRINSIC_DATA(sse2_min_sd, INTR_TYPE_2OP, X86ISD::FMINS, 0), X86_INTRINSIC_DATA(sse2_movmsk_pd, INTR_TYPE_1OP, X86ISD::MOVMSK, 0), X86_INTRINSIC_DATA(sse2_packssdw_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0), X86_INTRINSIC_DATA(sse2_packsswb_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0), |

