diff options
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 178 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 120 |
2 files changed, 157 insertions, 141 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index e13258db419..5c871fd1910 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -2830,7 +2830,8 @@ multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr, sched, HasBWI>, VEX, PS, VEX_W; } -defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, WriteVecLogic>; +// TODO - do we need a X86SchedWriteWidths::KMASK type? +defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SchedWriteVecLogic.XMM>; // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit let Predicates = [HasAVX512, NoDQI] in @@ -2876,12 +2877,13 @@ def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>; def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>; def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>; -defm KAND : avx512_mask_binop_all<0x41, "kand", and, WriteVecLogic, 1>; -defm KOR : avx512_mask_binop_all<0x45, "kor", or, WriteVecLogic, 1>; -defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, WriteVecLogic, 1>; -defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, WriteVecLogic, 1>; -defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, WriteVecLogic, 0>; -defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, WriteVecLogic, 1, HasDQI>; +// TODO - do we need a X86SchedWriteWidths::KMASK type? +defm KAND : avx512_mask_binop_all<0x41, "kand", and, SchedWriteVecLogic.XMM, 1>; +defm KOR : avx512_mask_binop_all<0x45, "kor", or, SchedWriteVecLogic.XMM, 1>; +defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SchedWriteVecLogic.XMM, 1>; +defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SchedWriteVecLogic.XMM, 1>; +defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SchedWriteVecLogic.XMM, 0>; +defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>; multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode, Instruction Inst> { @@ -2960,8 +2962,9 @@ multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode, VEX, PD, VEX_W; } -defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, WriteVecLogic>; -defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, WriteVecLogic, HasDQI>; +// TODO - do we need a X86SchedWriteWidths::KMASK type? +defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SchedWriteVecLogic.XMM>; +defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SchedWriteVecLogic.XMM, HasDQI>; // Mask shift multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC, @@ -3718,7 +3721,7 @@ let hasSideEffects = 0 in def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst), (ins VR128X:$src), "vmovq.s\t{$src, $dst|$dst, $src}", []>, - EVEX, VEX_W, Sched<[WriteVecLogic]>; + EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>; } // ExeDomain = SSEPackedInt // Move Scalar Single to Double Int @@ -4149,7 +4152,7 @@ let Predicates = [HasAVX512] in { (VMOVSDZrr VR128X:$src1, VR128X:$src2)>; } -let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in { +let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in { let AddedComplexity = 15 in def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src), @@ -4763,23 +4766,23 @@ multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr, multiclass avx512_logic_rmb_vl<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, - SDNode OpNodeMsk, X86FoldableSchedWrite sched, + SDNode OpNodeMsk, X86SchedWriteWidths sched, AVX512VLVectorVTInfo VTInfo, bit IsCommutable = 0> { let Predicates = [HasAVX512] in - defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched, + defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.ZMM, VTInfo.info512, IsCommutable>, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in { - defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched, + defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.YMM, VTInfo.info256, IsCommutable>, EVEX_V256; - defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched, + defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched.XMM, VTInfo.info128, IsCommutable>, EVEX_V128; } } multiclass avx512_logic_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, - SDNode OpNode, X86FoldableSchedWrite sched, + SDNode OpNode, X86SchedWriteWidths sched, bit IsCommutable = 0> { defm Q : avx512_logic_rmb_vl<opc_q, OpcodeStr#"q", OpNode, OpNode, sched, avx512vl_i64_info, IsCommutable>, @@ -4789,10 +4792,14 @@ multiclass avx512_logic_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, EVEX_CD8<32, CD8VF>; } -defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and, WriteVecLogic, 1>; -defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or, WriteVecLogic, 1>; -defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, WriteVecLogic, 1>; -defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, WriteVecLogic>; +defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and, + SchedWriteVecLogic, 1>; +defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or, + SchedWriteVecLogic, 1>; +defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, + SchedWriteVecLogic, 1>; +defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, + SchedWriteVecLogic>; //===----------------------------------------------------------------------===// // AVX-512 FP arithmetic @@ -5007,83 +5014,86 @@ multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, } multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, - Predicate prd, X86FoldableSchedWrite sched, - X86FoldableSchedWrite schedY, + Predicate prd, X86SchedWriteWidths sched, bit IsCommutable = 0> { let Predicates = [prd] in { defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info, - schedY, IsCommutable>, EVEX_V512, PS, + sched.ZMM, IsCommutable>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info, - schedY, IsCommutable>, EVEX_V512, PD, VEX_W, + sched.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>; } // Define only if AVX512VL feature is present. let Predicates = [prd, HasVLX] in { defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info, - sched, IsCommutable>, EVEX_V128, PS, + sched.XMM, IsCommutable>, EVEX_V128, PS, EVEX_CD8<32, CD8VF>; defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info, - schedY, IsCommutable>, EVEX_V256, PS, + sched.YMM, IsCommutable>, EVEX_V256, PS, EVEX_CD8<32, CD8VF>; defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info, - sched, IsCommutable>, EVEX_V128, PD, VEX_W, + sched.XMM, IsCommutable>, EVEX_V128, PD, VEX_W, EVEX_CD8<64, CD8VF>; defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info, - schedY, IsCommutable>, EVEX_V256, PD, VEX_W, + sched.YMM, IsCommutable>, EVEX_V256, PD, VEX_W, EVEX_CD8<64, CD8VF>; } } multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, - X86FoldableSchedWrite sched> { - defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched, v16f32_info>, - EVEX_V512, PS, EVEX_CD8<32, CD8VF>; - defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched, v8f64_info>, - EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; + X86SchedWriteWidths sched> { + defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.ZMM, + v16f32_info>, + EVEX_V512, PS, EVEX_CD8<32, CD8VF>; + defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.ZMM, + v8f64_info>, + EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; } multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, - X86FoldableSchedWrite sched> { - defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched, v16f32_info>, - EVEX_V512, PS, EVEX_CD8<32, CD8VF>; - defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched, v8f64_info>, - EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; + X86SchedWriteWidths sched> { + defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.ZMM, + v16f32_info>, + EVEX_V512, PS, EVEX_CD8<32, CD8VF>; + defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.ZMM, + v8f64_info>, + EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; } defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512, - WriteFAdd, WriteFAdd, 1>, - avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, WriteFAdd>; + SchedWriteFAdd, 1>, + avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SchedWriteFAdd>; defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512, - WriteFMul, WriteFMul, 1>, - avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, WriteFMul>; + SchedWriteFMul, 1>, + avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SchedWriteFMul>; defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512, - WriteFAdd, WriteFAdd>, - avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, WriteFAdd>; + SchedWriteFAdd>, + avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SchedWriteFAdd>; defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512, - WriteFDiv, WriteFDiv>, - avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, WriteFDiv>; + SchedWriteFDiv>, + avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SchedWriteFDiv>; defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512, - WriteFCmp, WriteFCmp, 0>, - avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, WriteFCmp>; + SchedWriteFCmp, 0>, + avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, SchedWriteFCmp>; defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512, - WriteFCmp, WriteFCmp, 0>, - avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, WriteFCmp>; + SchedWriteFCmp, 0>, + avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, SchedWriteFCmp>; let isCodeGenOnly = 1 in { defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512, - WriteFCmp, WriteFCmp, 1>; + SchedWriteFCmp, 1>; defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512, - WriteFCmp, WriteFCmp, 1>; + SchedWriteFCmp, 1>; } defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI, - WriteFLogic, WriteFLogicY, 1>; + SchedWriteFLogic, 1>; defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI, - WriteFLogic, WriteFLogicY, 0>; + SchedWriteFLogic, 0>; defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI, - WriteFLogic, WriteFLogicY, 1>; + SchedWriteFLogic, 1>; defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI, - WriteFLogic, WriteFLogicY, 1>; + SchedWriteFLogic, 1>; // Patterns catch floating point selects with bitcasted integer logic ops. multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode, @@ -5355,17 +5365,17 @@ multiclass avx512_vptest_lowering<PatFrag OpNode, X86VectorVTInfo ExtendInfo, } multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, PatFrag OpNode, - X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _, + X86SchedWriteWidths sched, AVX512VLVectorVTInfo _, string Suffix> { let Predicates = [HasAVX512] in - defm Z : avx512_vptest<opc, OpcodeStr, OpNode, sched, _.info512, Suffix>, - avx512_vptest_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512; + defm Z : avx512_vptest<opc, OpcodeStr, OpNode, sched.ZMM, _.info512, Suffix>, + avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.ZMM, _.info512>, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in { - defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, sched, _.info256, Suffix>, - avx512_vptest_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256; - defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, sched, _.info128, Suffix>, - avx512_vptest_mb<opc, OpcodeStr, OpNode, sched, _.info128>, EVEX_V128; + defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, sched.YMM, _.info256, Suffix>, + avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.YMM, _.info256>, EVEX_V256; + defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, sched.XMM, _.info128, Suffix>, + avx512_vptest_mb<opc, OpcodeStr, OpNode, sched.XMM, _.info128>, EVEX_V128; } let Predicates = [HasAVX512, NoVLX] in { defm Z256_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info256, Suffix>; @@ -5374,7 +5384,7 @@ multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, PatFrag OpNode, } multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, PatFrag OpNode, - X86FoldableSchedWrite sched> { + X86SchedWriteWidths sched> { defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode, sched, avx512vl_i32_info, "D">; defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", OpNode, sched, @@ -5382,23 +5392,23 @@ multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, PatFrag OpNode, } multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr, - PatFrag OpNode, X86FoldableSchedWrite sched> { + PatFrag OpNode, X86SchedWriteWidths sched> { let Predicates = [HasBWI] in { - defm WZ: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched, v32i16_info, "W">, - EVEX_V512, VEX_W; - defm BZ: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched, v64i8_info, "B">, - EVEX_V512; + defm WZ: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.ZMM, + v32i16_info, "W">, EVEX_V512, VEX_W; + defm BZ: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.ZMM, + v64i8_info, "B">, EVEX_V512; } let Predicates = [HasVLX, HasBWI] in { - defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched, v16i16x_info, "W">, - EVEX_V256, VEX_W; - defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched, v8i16x_info, "W">, - EVEX_V128, VEX_W; - defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched, v32i8x_info, "B">, - EVEX_V256; - defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched, v16i8x_info, "B">, - EVEX_V128; + defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.YMM, + v16i16x_info, "W">, EVEX_V256, VEX_W; + defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched.XMM, + v8i16x_info, "W">, EVEX_V128, VEX_W; + defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.YMM, + v32i8x_info, "B">, EVEX_V256; + defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched.XMM, + v16i8x_info, "B">, EVEX_V128; } let Predicates = [HasAVX512, NoVLX] in { @@ -5418,19 +5428,19 @@ def X86pcmpnem : PatFrag<(ops node:$src1, node:$src2), (X86cmpm node:$src1, node:$src2, (i8 4))>; multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr, - PatFrag OpNode, X86FoldableSchedWrite sched> : - avx512_vptest_wb <opc_wb, OpcodeStr, OpNode, sched>, + PatFrag OpNode, X86SchedWriteWidths sched> : + avx512_vptest_wb<opc_wb, OpcodeStr, OpNode, sched>, avx512_vptest_dq<opc_dq, OpcodeStr, OpNode, sched>; defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86pcmpnem, - WriteVecLogic>, T8PD; + SchedWriteVecLogic>, T8PD; defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86pcmpeqm, - WriteVecLogic>, T8XS; - + SchedWriteVecLogic>, T8XS; //===----------------------------------------------------------------------===// // AVX-512 Shift instructions //===----------------------------------------------------------------------===// + multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _> { @@ -9886,9 +9896,9 @@ def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$sr //===----------------------------------------------------------------------===// defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512, - WriteFShuffle, WriteFShuffle>; + SchedWriteFShuffle>; defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512, - WriteFShuffle, WriteFShuffle>; + SchedWriteFShuffle>; defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl, WriteShuffle, HasBWI>; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index e99f806478b..51e89558739 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -2296,32 +2296,33 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, multiclass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode, ValueType OpVT128, ValueType OpVT256, - X86FoldableSchedWrite sched, bit IsCommutable, + X86SchedWriteWidths sched, bit IsCommutable, Predicate prd> { let Predicates = [HasAVX, prd] in defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128, - VR128, loadv2i64, i128mem, sched, IsCommutable, 0>, VEX_4V, VEX_WIG; + VR128, loadv2i64, i128mem, sched.XMM, + IsCommutable, 0>, VEX_4V, VEX_WIG; let Constraints = "$src1 = $dst" in defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128, - memopv2i64, i128mem, sched, IsCommutable, 1>; + memopv2i64, i128mem, sched.XMM, IsCommutable, 1>; let Predicates = [HasAVX2, prd] in defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, - OpVT256, VR256, loadv4i64, i256mem, sched, + OpVT256, VR256, loadv4i64, i256mem, sched.YMM, IsCommutable, 0>, VEX_4V, VEX_L, VEX_WIG; } // These are ordered here for pattern ordering requirements with the fp versions defm PAND : PDI_binop_all<0xDB, "pand", and, v2i64, v4i64, - WriteVecLogic, 1, NoVLX>; + SchedWriteVecLogic, 1, NoVLX>; defm POR : PDI_binop_all<0xEB, "por", or, v2i64, v4i64, - WriteVecLogic, 1, NoVLX>; + SchedWriteVecLogic, 1, NoVLX>; defm PXOR : PDI_binop_all<0xEF, "pxor", xor, v2i64, v4i64, - WriteVecLogic, 1, NoVLX>; + SchedWriteVecLogic, 1, NoVLX>; defm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64, - WriteVecLogic, 0, NoVLX>; + SchedWriteVecLogic, 0, NoVLX>; //===----------------------------------------------------------------------===// // SSE 1 & 2 - Logical Instructions @@ -3305,57 +3306,57 @@ multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode, } // ExeDomain = SSEPackedInt defm PADDB : PDI_binop_all<0xFC, "paddb", add, v16i8, v32i8, - WriteVecALU, 1, NoVLX_Or_NoBWI>; + SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; defm PADDW : PDI_binop_all<0xFD, "paddw", add, v8i16, v16i16, - WriteVecALU, 1, NoVLX_Or_NoBWI>; + SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; defm PADDD : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32, - WriteVecALU, 1, NoVLX>; + SchedWriteVecALU, 1, NoVLX>; defm PADDQ : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64, - WriteVecALU, 1, NoVLX>; + SchedWriteVecALU, 1, NoVLX>; defm PADDSB : PDI_binop_all<0xEC, "paddsb", X86adds, v16i8, v32i8, - WriteVecALU, 1, NoVLX_Or_NoBWI>; + SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; defm PADDSW : PDI_binop_all<0xED, "paddsw", X86adds, v8i16, v16i16, - WriteVecALU, 1, NoVLX_Or_NoBWI>; + SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; defm PADDUSB : PDI_binop_all<0xDC, "paddusb", X86addus, v16i8, v32i8, - WriteVecALU, 1, NoVLX_Or_NoBWI>; + SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; defm PADDUSW : PDI_binop_all<0xDD, "paddusw", X86addus, v8i16, v16i16, - WriteVecALU, 1, NoVLX_Or_NoBWI>; + SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; defm PMULLW : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16, - WriteVecIMul, 1, NoVLX_Or_NoBWI>; + SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>; defm PMULHUW : PDI_binop_all<0xE4, "pmulhuw", mulhu, v8i16, v16i16, - WriteVecIMul, 1, NoVLX_Or_NoBWI>; + SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>; defm PMULHW : PDI_binop_all<0xE5, "pmulhw", mulhs, v8i16, v16i16, - WriteVecIMul, 1, NoVLX_Or_NoBWI>; + SchedWriteVecIMul, 1, NoVLX_Or_NoBWI>; defm PSUBB : PDI_binop_all<0xF8, "psubb", sub, v16i8, v32i8, - WriteVecALU, 0, NoVLX_Or_NoBWI>; + SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; defm PSUBW : PDI_binop_all<0xF9, "psubw", sub, v8i16, v16i16, - WriteVecALU, 0, NoVLX_Or_NoBWI>; + SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; defm PSUBD : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32, - WriteVecALU, 0, NoVLX>; + SchedWriteVecALU, 0, NoVLX>; defm PSUBQ : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64, - WriteVecALU, 0, NoVLX>; + SchedWriteVecALU, 0, NoVLX>; defm PSUBSB : PDI_binop_all<0xE8, "psubsb", X86subs, v16i8, v32i8, - WriteVecALU, 0, NoVLX_Or_NoBWI>; + SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; defm PSUBSW : PDI_binop_all<0xE9, "psubsw", X86subs, v8i16, v16i16, - WriteVecALU, 0, NoVLX_Or_NoBWI>; + SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", X86subus, v16i8, v32i8, - WriteVecALU, 0, NoVLX_Or_NoBWI>; + SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", X86subus, v8i16, v16i16, - WriteVecALU, 0, NoVLX_Or_NoBWI>; + SchedWriteVecALU, 0, NoVLX_Or_NoBWI>; defm PMINUB : PDI_binop_all<0xDA, "pminub", umin, v16i8, v32i8, - WriteVecALU, 1, NoVLX_Or_NoBWI>; + SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; defm PMINSW : PDI_binop_all<0xEA, "pminsw", smin, v8i16, v16i16, - WriteVecALU, 1, NoVLX_Or_NoBWI>; + SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; defm PMAXUB : PDI_binop_all<0xDE, "pmaxub", umax, v16i8, v32i8, - WriteVecALU, 1, NoVLX_Or_NoBWI>; + SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; defm PMAXSW : PDI_binop_all<0xEE, "pmaxsw", smax, v8i16, v16i16, - WriteVecALU, 1, NoVLX_Or_NoBWI>; + SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; defm PAVGB : PDI_binop_all<0xE0, "pavgb", X86avg, v16i8, v32i8, - WriteVecALU, 1, NoVLX_Or_NoBWI>; + SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; defm PAVGW : PDI_binop_all<0xE3, "pavgw", X86avg, v8i16, v16i16, - WriteVecALU, 1, NoVLX_Or_NoBWI>; + SchedWriteVecALU, 1, NoVLX_Or_NoBWI>; defm PMULUDQ : PDI_binop_all<0xF4, "pmuludq", X86pmuludq, v2i64, v4i64, - WriteVecIMul, 1, NoVLX>; + SchedWriteVecIMul, 1, NoVLX>; let Predicates = [HasAVX, NoVLX_Or_NoBWI] in defm VPMADDWD : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v4i32, v8i16, VR128, @@ -3493,17 +3494,17 @@ let ExeDomain = SSEPackedInt in { //===---------------------------------------------------------------------===// defm PCMPEQB : PDI_binop_all<0x74, "pcmpeqb", X86pcmpeq, v16i8, v32i8, - WriteVecALU, 1, TruePredicate>; + SchedWriteVecALU, 1, TruePredicate>; defm PCMPEQW : PDI_binop_all<0x75, "pcmpeqw", X86pcmpeq, v8i16, v16i16, - WriteVecALU, 1, TruePredicate>; + SchedWriteVecALU, 1, TruePredicate>; defm PCMPEQD : PDI_binop_all<0x76, "pcmpeqd", X86pcmpeq, v4i32, v8i32, - WriteVecALU, 1, TruePredicate>; + SchedWriteVecALU, 1, TruePredicate>; defm PCMPGTB : PDI_binop_all<0x64, "pcmpgtb", X86pcmpgt, v16i8, v32i8, - WriteVecALU, 0, TruePredicate>; + SchedWriteVecALU, 0, TruePredicate>; defm PCMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16, - WriteVecALU, 0, TruePredicate>; + SchedWriteVecALU, 0, TruePredicate>; defm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32, - WriteVecALU, 0, TruePredicate>; + SchedWriteVecALU, 0, TruePredicate>; //===---------------------------------------------------------------------===// // SSE2 - Packed Integer Shuffle Instructions @@ -4184,7 +4185,7 @@ def MOVPQI2QImr : S2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), // For disassembler only let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, - SchedRW = [WriteVecLogic] in { + SchedRW = [SchedWriteVecLogic.XMM] in { def VMOVPQI2QIrr : VS2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_WIG; def MOVPQI2QIrr : S2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), @@ -4226,7 +4227,7 @@ let Predicates = [UseSSE2], AddedComplexity = 20 in { // Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in // IA32 document. movq xmm1, xmm2 does clear the high bits. // -let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in { +let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in { let AddedComplexity = 15 in def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vmovq\t{$src, $dst|$dst, $src}", @@ -5650,56 +5651,61 @@ let Defs = [EFLAGS], Predicates = [HasAVX] in { def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "vptest\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, - Sched<[WriteVecLogic]>, VEX, VEX_WIG; + Sched<[SchedWriteVecLogic.XMM]>, VEX, VEX_WIG; def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), "vptest\t{$src2, $src1|$src1, $src2}", [(set EFLAGS,(X86ptest VR128:$src1, (loadv2i64 addr:$src2)))]>, - Sched<[WriteVecLogicLd, ReadAfterLd]>, VEX, VEX_WIG; + Sched<[SchedWriteVecLogic.XMM.Folded, ReadAfterLd]>, + VEX, VEX_WIG; def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2), "vptest\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>, - Sched<[WriteVecLogic]>, VEX, VEX_L, VEX_WIG; + Sched<[SchedWriteVecLogic.YMM]>, VEX, VEX_L, VEX_WIG; def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2), "vptest\t{$src2, $src1|$src1, $src2}", [(set EFLAGS,(X86ptest VR256:$src1, (loadv4i64 addr:$src2)))]>, - Sched<[WriteVecLogicLd, ReadAfterLd]>, VEX, VEX_L, VEX_WIG; + Sched<[SchedWriteVecLogic.YMM.Folded, ReadAfterLd]>, + VEX, VEX_L, VEX_WIG; } let Defs = [EFLAGS] in { def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "ptest\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, - Sched<[WriteVecLogic]>; + Sched<[SchedWriteVecLogic.XMM]>; def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), "ptest\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>, - Sched<[WriteVecLogicLd, ReadAfterLd]>; + Sched<[SchedWriteVecLogic.XMM.Folded, ReadAfterLd]>; } // The bit test instructions below are AVX only multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC, - X86MemOperand x86memop, PatFrag mem_frag, ValueType vt> { + X86MemOperand x86memop, PatFrag mem_frag, ValueType vt, + X86FoldableSchedWrite sched> { def rr : SS48I<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), [(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>, - Sched<[WriteVecLogic]>, VEX; + Sched<[sched]>, VEX; def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), [(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>, - Sched<[WriteVecLogicLd, ReadAfterLd]>, VEX; + Sched<[sched.Folded, ReadAfterLd]>, VEX; } let Defs = [EFLAGS], Predicates = [HasAVX] in { let ExeDomain = SSEPackedSingle in { -defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, loadv4f32, v4f32>; -defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, loadv8f32, v8f32>, - VEX_L; +defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, loadv4f32, v4f32, + SchedWriteVecLogic.XMM>; +defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, loadv8f32, v8f32, + SchedWriteVecLogic.YMM>, VEX_L; } let ExeDomain = SSEPackedDouble in { -defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, loadv2f64, v2f64>; -defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, loadv4f64, v4f64>, - VEX_L; +defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, loadv2f64, v2f64, + SchedWriteVecLogic.XMM>; +defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, loadv4f64, v4f64, + SchedWriteVecLogic.YMM>, VEX_L; } } |