diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-04-13 14:36:59 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-04-13 14:36:59 +0000 |
| commit | 21e89795cc66c4db24ff9729ec27365734b137ff (patch) | |
| tree | cd98b4067dfca5f076c10e340dd57afa767638da /llvm/lib/Target | |
| parent | e0c7868dedea85afc8cd8e5b8151011820cdfbe6 (diff) | |
| download | bcm5719-llvm-21e89795cc66c4db24ff9729ec27365734b137ff.tar.gz bcm5719-llvm-21e89795cc66c4db24ff9729ec27365734b137ff.zip | |
[X86] Remove remaining OpndItins/SizeItins from all instruction defs (PR37093)
llvm-svn: 330022
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 2631 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 385 |
2 files changed, 1301 insertions, 1715 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index d958444c2d7..d6c39395b6b 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -482,7 +482,7 @@ multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From, X86VectorVTInfo To, SDPatternOperator vinsert_insert, SDPatternOperator vinsert_for_mask, - OpndItins itins> { + X86FoldableSchedWrite sched> { let hasSideEffects = 0, ExeDomain = To.ExeDomain in { defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst), (ins To.RC:$src1, From.RC:$src2, u8imm:$src3), @@ -494,7 +494,7 @@ multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From, (vinsert_for_mask:$src3 (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm))>, - AVX512AIi8Base, EVEX_4V, Sched<[itins.Sched]>; + AVX512AIi8Base, EVEX_4V, Sched<[sched]>; let mayLoad = 1 in defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst), (ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3), @@ -507,7 +507,7 @@ multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From, (From.VT (bitconvert (From.LdFrag addr:$src2))), (iPTR imm))>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<From.EltSize, From.CD8TupleForm>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } } @@ -515,8 +515,8 @@ multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From, multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From, X86VectorVTInfo To, SDPatternOperator vinsert_insert, - OpndItins itins> : - vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, itins>; + X86FoldableSchedWrite sched> : + vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert, sched>; multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From, X86VectorVTInfo To, PatFrag vinsert_insert, @@ -540,30 +540,30 @@ multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From, multiclass vinsert_for_type<ValueType EltVT32, int Opcode128, ValueType EltVT64, int Opcode256, - OpndItins itins> { + X86FoldableSchedWrite sched> { let Predicates = [HasVLX] in defm NAME # "32x4Z256" : vinsert_for_size<Opcode128, X86VectorVTInfo< 4, EltVT32, VR128X>, X86VectorVTInfo< 8, EltVT32, VR256X>, - vinsert128_insert, itins>, EVEX_V256; + vinsert128_insert, sched>, EVEX_V256; defm NAME # "32x4Z" : vinsert_for_size<Opcode128, X86VectorVTInfo< 4, EltVT32, VR128X>, X86VectorVTInfo<16, EltVT32, VR512>, - vinsert128_insert, itins>, EVEX_V512; + vinsert128_insert, sched>, EVEX_V512; defm NAME # "64x4Z" : vinsert_for_size<Opcode256, X86VectorVTInfo< 4, EltVT64, VR256X>, X86VectorVTInfo< 8, EltVT64, VR512>, - vinsert256_insert, itins>, VEX_W, EVEX_V512; + vinsert256_insert, sched>, VEX_W, EVEX_V512; // Even with DQI we'd like to only use these instructions for masking. let Predicates = [HasVLX, HasDQI] in defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128, X86VectorVTInfo< 2, EltVT64, VR128X>, X86VectorVTInfo< 4, EltVT64, VR256X>, - null_frag, vinsert128_insert, itins>, + null_frag, vinsert128_insert, sched>, VEX_W, EVEX_V256; // Even with DQI we'd like to only use these instructions for masking. @@ -571,29 +571,20 @@ multiclass vinsert_for_type<ValueType EltVT32, int Opcode128, defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128, X86VectorVTInfo< 2, EltVT64, VR128X>, X86VectorVTInfo< 8, EltVT64, VR512>, - null_frag, vinsert128_insert, itins>, + null_frag, vinsert128_insert, sched>, VEX_W, EVEX_V512; defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256, X86VectorVTInfo< 8, EltVT32, VR256X>, X86VectorVTInfo<16, EltVT32, VR512>, - null_frag, vinsert256_insert, itins>, + null_frag, vinsert256_insert, sched>, EVEX_V512; } } -// FIXME: Is there a better scheduler itinerary for VINSERTF/VINSERTI? -let Sched = WriteFShuffle256 in -def AVX512_VINSERTF : OpndItins< - NoItinerary, NoItinerary ->; -let Sched = WriteShuffle256 in -def AVX512_VINSERTI : OpndItins< - NoItinerary, NoItinerary ->; - -defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, AVX512_VINSERTF>; -defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, AVX512_VINSERTI>; +// FIXME: Is there a better scheduler class for VINSERTF/VINSERTI? +defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a, WriteFShuffle256>; +defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a, WriteShuffle256>; // Codegen pattern with the alternative types, // Even with AVX512DQ we'll still use these for unmasked operations. @@ -1710,18 +1701,8 @@ defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", //===----------------------------------------------------------------------===// // -- VPERMI2 - 3 source operands form -- -let Sched = WriteFVarShuffle256 in -def AVX512_PERM2_F : OpndItins< - NoItinerary, NoItinerary ->; - -let Sched = WriteVarShuffle256 in -def AVX512_PERM2_I : OpndItins< - NoItinerary, NoItinerary ->; - -multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, OpndItins itins, - X86VectorVTInfo _> { +multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, + X86FoldableSchedWrite sched, X86VectorVTInfo _> { let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { // The index operand in the pattern should really be an integer type. However, // if we do that and it happens to come from a bitcast, then it becomes @@ -1732,19 +1713,19 @@ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { (ins _.RC:$src2, _.RC:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", (_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2, _.RC:$src3)), 1>, - EVEX_4V, AVX5128IBase, Sched<[itins.Sched]>; + EVEX_4V, AVX5128IBase, Sched<[sched]>; defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src2, _.MemOp:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", (_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2, (_.VT (bitconvert (_.LdFrag addr:$src3))))), 1>, - EVEX_4V, AVX5128IBase, Sched<[itins.Sched.Folded, ReadAfterLd]>; + EVEX_4V, AVX5128IBase, Sched<[sched.Folded, ReadAfterLd]>; } } -multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr, OpndItins itins, - X86VectorVTInfo _> { +multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr, + X86FoldableSchedWrite sched, X86VectorVTInfo _> { let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src2, _.ScalarMemOp:$src3), @@ -1753,67 +1734,68 @@ multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr, OpndItins itins, (_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>, AVX5128IBase, EVEX_4V, EVEX_B, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } -multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr, OpndItins itins, +multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr, + X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> { - defm NAME: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info512>, - avx512_perm_i_mb<opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512; + defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512>, + avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info512>, EVEX_V512; let Predicates = [HasVLX] in { - defm NAME#128: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info128>, - avx512_perm_i_mb<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128; - defm NAME#256: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info256>, - avx512_perm_i_mb<opc, OpcodeStr, itins, VTInfo.info256>, EVEX_V256; + defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128>, + avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info128>, EVEX_V128; + defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256>, + avx512_perm_i_mb<opc, OpcodeStr, sched, VTInfo.info256>, EVEX_V256; } } multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr, - OpndItins itins, + X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo, Predicate Prd> { let Predicates = [Prd] in - defm NAME: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512; + defm NAME: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info512>, EVEX_V512; let Predicates = [Prd, HasVLX] in { - defm NAME#128: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128; - defm NAME#256: avx512_perm_i<opc, OpcodeStr, itins, VTInfo.info256>, EVEX_V256; + defm NAME#128: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info128>, EVEX_V128; + defm NAME#256: avx512_perm_i<opc, OpcodeStr, sched, VTInfo.info256>, EVEX_V256; } } -defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", AVX512_PERM2_I, +defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; -defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", AVX512_PERM2_I, +defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", AVX512_PERM2_I, - avx512vl_i16_info, HasBWI>, - VEX_W, EVEX_CD8<16, CD8VF>; -defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", AVX512_PERM2_I, - avx512vl_i8_info, HasVBMI>, - EVEX_CD8<8, CD8VF>; -defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", AVX512_PERM2_F, +defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256, + avx512vl_i16_info, HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>; +defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256, + avx512vl_i8_info, HasVBMI>, EVEX_CD8<8, CD8VF>; +defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256, avx512vl_f32_info>, EVEX_CD8<32, CD8VF>; -defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", AVX512_PERM2_F, +defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256, avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>; // VPERMT2 -multiclass avx512_perm_t<bits<8> opc, string OpcodeStr, OpndItins itins, +multiclass avx512_perm_t<bits<8> opc, string OpcodeStr, + X86FoldableSchedWrite sched, X86VectorVTInfo _, X86VectorVTInfo IdxVT> { let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins IdxVT.RC:$src2, _.RC:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>, - EVEX_4V, AVX5128IBase, Sched<[itins.Sched]>; + EVEX_4V, AVX5128IBase, Sched<[sched]>; defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins IdxVT.RC:$src2, _.MemOp:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, (bitconvert (_.LdFrag addr:$src3)))), 1>, - EVEX_4V, AVX5128IBase, Sched<[itins.Sched.Folded, ReadAfterLd]>; + EVEX_4V, AVX5128IBase, Sched<[sched.Folded, ReadAfterLd]>; } } -multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr, OpndItins itins, +multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr, + X86FoldableSchedWrite sched, X86VectorVTInfo _, X86VectorVTInfo IdxVT> { let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), @@ -1823,114 +1805,105 @@ multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr, OpndItins itins, (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>, AVX5128IBase, EVEX_4V, EVEX_B, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } -multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr, OpndItins itins, +multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr, + X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo, AVX512VLVectorVTInfo ShuffleMask> { - defm NAME: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info512, + defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512, ShuffleMask.info512>, - avx512_perm_t_mb<opc, OpcodeStr, itins, VTInfo.info512, + avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info512, ShuffleMask.info512>, EVEX_V512; let Predicates = [HasVLX] in { - defm NAME#128: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info128, + defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128, ShuffleMask.info128>, - avx512_perm_t_mb<opc, OpcodeStr, itins, VTInfo.info128, + avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info128, ShuffleMask.info128>, EVEX_V128; - defm NAME#256: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info256, + defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256, ShuffleMask.info256>, - avx512_perm_t_mb<opc, OpcodeStr, itins, VTInfo.info256, + avx512_perm_t_mb<opc, OpcodeStr, sched, VTInfo.info256, ShuffleMask.info256>, EVEX_V256; } } -multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr, OpndItins itins, - AVX512VLVectorVTInfo VTInfo, - AVX512VLVectorVTInfo Idx, - Predicate Prd> { +multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr, + X86FoldableSchedWrite sched, + AVX512VLVectorVTInfo VTInfo, + AVX512VLVectorVTInfo Idx, Predicate Prd> { let Predicates = [Prd] in - defm NAME: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info512, + defm NAME: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info512, Idx.info512>, EVEX_V512; let Predicates = [Prd, HasVLX] in { - defm NAME#128: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info128, + defm NAME#128: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info128, Idx.info128>, EVEX_V128; - defm NAME#256: avx512_perm_t<opc, OpcodeStr, itins, VTInfo.info256, + defm NAME#256: avx512_perm_t<opc, OpcodeStr, sched, VTInfo.info256, Idx.info256>, EVEX_V256; } } -defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", AVX512_PERM2_I, +defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256, avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; -defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", AVX512_PERM2_I, +defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256, avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", AVX512_PERM2_I, +defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256, avx512vl_i16_info, avx512vl_i16_info, HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>; -defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", AVX512_PERM2_I, +defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256, avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, EVEX_CD8<8, CD8VF>; -defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", AVX512_PERM2_F, +defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256, avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; -defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", AVX512_PERM2_F, +defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256, avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; //===----------------------------------------------------------------------===// // AVX-512 - BLEND using mask // -let Sched = WriteFVarBlend in -def AVX512_BLENDM : OpndItins< - NoItinerary, NoItinerary ->; - -let Sched = WriteVarBlend in -def AVX512_PBLENDM : OpndItins< - NoItinerary, NoItinerary ->; - -multiclass avx512_blendmask<bits<8> opc, string OpcodeStr, OpndItins itins, - X86VectorVTInfo _> { +multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr, + X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>, - EVEX_4V, Sched<[itins.Sched]>; + EVEX_4V, Sched<[sched]>; def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), - []>, EVEX_4V, EVEX_K, Sched<[itins.Sched]>; + []>, EVEX_4V, EVEX_K, Sched<[sched]>; def rrkz : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), - []>, EVEX_4V, EVEX_KZ, Sched<[itins.Sched]>; + []>, EVEX_4V, EVEX_KZ, Sched<[sched]>; let mayLoad = 1 in { def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.RC:$src1, _.MemOp:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"), []>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), []>, EVEX_4V, EVEX_K, EVEX_CD8<_.EltSize, CD8VF>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; def rmkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"), []>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } } } -multiclass avx512_blendmask_rmb<bits<8> opc, string OpcodeStr, OpndItins itins, - X86VectorVTInfo _> { +multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr, + X86FoldableSchedWrite sched, X86VectorVTInfo _> { let mayLoad = 1, hasSideEffects = 0 in { def rmbk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), @@ -1938,7 +1911,7 @@ multiclass avx512_blendmask_rmb<bits<8> opc, string OpcodeStr, OpndItins itins, "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, "}"), []>, EVEX_4V, EVEX_K, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; def rmbkz : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), @@ -1946,7 +1919,7 @@ multiclass avx512_blendmask_rmb<bits<8> opc, string OpcodeStr, OpndItins itins, "\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|", "$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>, EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2), @@ -1954,42 +1927,42 @@ multiclass avx512_blendmask_rmb<bits<8> opc, string OpcodeStr, OpndItins itins, "\t{${src2}", _.BroadcastStr, ", $src1, $dst|", "$dst, $src1, ${src2}", _.BroadcastStr, "}"), []>, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } } -multiclass blendmask_dq <bits<8> opc, string OpcodeStr, OpndItins itins, - AVX512VLVectorVTInfo VTInfo> { - defm Z : avx512_blendmask <opc, OpcodeStr, itins, VTInfo.info512>, - avx512_blendmask_rmb <opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512; +multiclass blendmask_dq<bits<8> opc, string OpcodeStr, + X86FoldableSchedWrite sched, + AVX512VLVectorVTInfo VTInfo> { + defm Z : WriteFVarBlendask <opc, OpcodeStr, sched, VTInfo.info512>, + WriteFVarBlendask_rmb <opc, OpcodeStr, sched, VTInfo.info512>, EVEX_V512; let Predicates = [HasVLX] in { - defm Z256 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info256>, - avx512_blendmask_rmb<opc, OpcodeStr, itins, VTInfo.info256>, EVEX_V256; - defm Z128 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info128>, - avx512_blendmask_rmb<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128; + defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched, VTInfo.info256>, + WriteFVarBlendask_rmb<opc, OpcodeStr, sched, VTInfo.info256>, EVEX_V256; + defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched, VTInfo.info128>, + WriteFVarBlendask_rmb<opc, OpcodeStr, sched, VTInfo.info128>, EVEX_V128; } } -multiclass blendmask_bw <bits<8> opc, string OpcodeStr, OpndItins itins, - AVX512VLVectorVTInfo VTInfo> { +multiclass blendmask_bw<bits<8> opc, string OpcodeStr, + X86FoldableSchedWrite sched, + AVX512VLVectorVTInfo VTInfo> { let Predicates = [HasBWI] in - defm Z : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info512>, EVEX_V512; + defm Z : WriteFVarBlendask<opc, OpcodeStr, sched, VTInfo.info512>, EVEX_V512; let Predicates = [HasBWI, HasVLX] in { - defm Z256 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info256>, EVEX_V256; - defm Z128 : avx512_blendmask<opc, OpcodeStr, itins, VTInfo.info128>, EVEX_V128; + defm Z256 : WriteFVarBlendask<opc, OpcodeStr, sched, VTInfo.info256>, EVEX_V256; + defm Z128 : WriteFVarBlendask<opc, OpcodeStr, sched, VTInfo.info128>, EVEX_V128; } } - -defm VBLENDMPS : blendmask_dq <0x65, "vblendmps", AVX512_BLENDM, avx512vl_f32_info>; -defm VBLENDMPD : blendmask_dq <0x65, "vblendmpd", AVX512_BLENDM, avx512vl_f64_info>, VEX_W; -defm VPBLENDMD : blendmask_dq <0x64, "vpblendmd", AVX512_PBLENDM, avx512vl_i32_info>; -defm VPBLENDMQ : blendmask_dq <0x64, "vpblendmq", AVX512_PBLENDM, avx512vl_i64_info>, VEX_W; -defm VPBLENDMB : blendmask_bw <0x66, "vpblendmb", AVX512_PBLENDM, avx512vl_i8_info>; -defm VPBLENDMW : blendmask_bw <0x66, "vpblendmw", AVX512_PBLENDM, avx512vl_i16_info>, VEX_W; - +defm VBLENDMPS : blendmask_dq <0x65, "vblendmps", WriteFVarBlend, avx512vl_f32_info>; +defm VBLENDMPD : blendmask_dq <0x65, "vblendmpd", WriteFVarBlend, avx512vl_f64_info>, VEX_W; +defm VPBLENDMD : blendmask_dq <0x64, "vpblendmd", WriteVarBlend, avx512vl_i32_info>; +defm VPBLENDMQ : blendmask_dq <0x64, "vpblendmq", WriteVarBlend, avx512vl_i64_info>, VEX_W; +defm VPBLENDMB : blendmask_bw <0x66, "vpblendmb", WriteVarBlend, avx512vl_i8_info>; +defm VPBLENDMW : blendmask_bw <0x66, "vpblendmw", WriteVarBlend, avx512vl_i16_info>, VEX_W; //===----------------------------------------------------------------------===// // Compare Instructions @@ -1998,7 +1971,7 @@ defm VPBLENDMW : blendmask_bw <0x66, "vpblendmw", AVX512_PBLENDM, avx512vl_i16_i // avx512_cmp_scalar - AVX512 CMPSS and CMPSD multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd, - OpndItins itins> { + X86FoldableSchedWrite sched> { defm rr_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVXCC:$cc), @@ -2006,7 +1979,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd, "$src2, $src1", "$src1, $src2", (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), - imm:$cc)>, EVEX_4V, Sched<[itins.Sched]>; + imm:$cc)>, EVEX_4V, Sched<[sched]>; let mayLoad = 1 in defm rm_Int : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, (outs _.KRC:$dst), @@ -2015,7 +1988,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd, "$src2, $src1", "$src1, $src2", (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2, imm:$cc)>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; defm rrb_Int : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, (outs _.KRC:$dst), @@ -2026,7 +1999,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd, (_.VT _.RC:$src2), imm:$cc, (i32 FROUND_NO_EXC))>, - EVEX_4V, EVEX_B, Sched<[itins.Sched]>; + EVEX_4V, EVEX_B, Sched<[sched]>; // Accept explicit immediate argument form instead of comparison code. let isAsmParserOnly = 1, hasSideEffects = 0 in { defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _, @@ -2034,7 +2007,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd, (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), "vcmp"#_.Suffix, "$cc, $src2, $src1", "$src1, $src2, $cc">, EVEX_4V, - Sched<[itins.Sched]>; + Sched<[sched]>; let mayLoad = 1 in defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _, (outs _.KRC:$dst), @@ -2042,14 +2015,14 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd, "vcmp"#_.Suffix, "$cc, $src2, $src1", "$src1, $src2, $cc">, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; defm rrb_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), "vcmp"#_.Suffix, "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc">, - EVEX_4V, EVEX_B, Sched<[itins.Sched]>; + EVEX_4V, EVEX_B, Sched<[sched]>; }// let isAsmParserOnly = 1, hasSideEffects = 0 let isCodeGenOnly = 1 in { @@ -2061,7 +2034,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd, [(set _.KRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2, imm:$cc))]>, - EVEX_4V, Sched<[itins.Sched]>; + EVEX_4V, Sched<[sched]>; def rm : AVX512Ii8<0xC2, MRMSrcMem, (outs _.KRC:$dst), (ins _.FRC:$src1, _.ScalarMemOp:$src2, AVXCC:$cc), @@ -2071,33 +2044,34 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd, (_.ScalarLdFrag addr:$src2), imm:$cc))]>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VT1>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } } let Predicates = [HasAVX512] in { let ExeDomain = SSEPackedSingle in defm VCMPSSZ : avx512_cmp_scalar<f32x_info, X86cmpms, X86cmpmsRnd, - SSE_ALU_F32S>, AVX512XSIi8Base; + WriteFAdd>, AVX512XSIi8Base; let ExeDomain = SSEPackedDouble in defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsRnd, - SSE_ALU_F64S>, AVX512XDIi8Base, VEX_W; + WriteFAdd>, AVX512XDIi8Base, VEX_W; } multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode, - OpndItins itins, X86VectorVTInfo _, bit IsCommutable> { + X86FoldableSchedWrite sched, X86VectorVTInfo _, + bit IsCommutable> { let isCommutable = IsCommutable in def rr : AVX512BI<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))]>, - EVEX_4V, Sched<[itins.Sched]>; + EVEX_4V, Sched<[sched]>; def rm : AVX512BI<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT (bitconvert (_.LdFrag addr:$src2)))))]>, - EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>; + EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; let isCommutable = IsCommutable in def rrk : AVX512BI<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), @@ -2105,7 +2079,7 @@ multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode, "$dst {${mask}}, $src1, $src2}"), [(set _.KRC:$dst, (and _.KRCWM:$mask, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))))]>, - EVEX_4V, EVEX_K, Sched<[itins.Sched]>; + EVEX_4V, EVEX_K, Sched<[sched]>; def rmk : AVX512BI<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst {${mask}}|", @@ -2114,19 +2088,20 @@ multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, PatFrag OpNode, (OpNode (_.VT _.RC:$src1), (_.VT (bitconvert (_.LdFrag addr:$src2))))))]>, - EVEX_4V, EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>; + EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>; } multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode, - OpndItins itins, X86VectorVTInfo _, bit IsCommutable> : - avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, _, IsCommutable> { + X86FoldableSchedWrite sched, X86VectorVTInfo _, + bit IsCommutable> : + avx512_icmp_packed<opc, OpcodeStr, OpNode, sched, _, IsCommutable> { def rmb : AVX512BI<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2), !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst", "|$dst, $src1, ${src2}", _.BroadcastStr, "}"), [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (X86VBroadcast (_.ScalarLdFrag addr:$src2))))]>, - EVEX_4V, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>; + EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; def rmbk : AVX512BI<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2), @@ -2138,36 +2113,37 @@ multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, PatFrag OpNode, (X86VBroadcast (_.ScalarLdFrag addr:$src2)))))]>, EVEX_4V, EVEX_K, EVEX_B, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, PatFrag OpNode, - OpndItins itins, AVX512VLVectorVTInfo VTInfo, - Predicate prd, bit IsCommutable = 0> { + X86FoldableSchedWrite sched, + AVX512VLVectorVTInfo VTInfo, Predicate prd, + bit IsCommutable = 0> { let Predicates = [prd] in - defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, VTInfo.info512, + defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched, VTInfo.info512, IsCommutable>, EVEX_V512; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, VTInfo.info256, + defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched, VTInfo.info256, IsCommutable>, EVEX_V256; - defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, itins, VTInfo.info128, + defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, sched, VTInfo.info128, IsCommutable>, EVEX_V128; } } multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr, - PatFrag OpNode, OpndItins itins, + PatFrag OpNode, X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo, Predicate prd, bit IsCommutable = 0> { let Predicates = [prd] in - defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info512, + defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched, VTInfo.info512, IsCommutable>, EVEX_V512; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info256, + defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched, VTInfo.info256, IsCommutable>, EVEX_V256; - defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info128, + defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, sched, VTInfo.info128, IsCommutable>, EVEX_V128; } } @@ -2179,37 +2155,37 @@ def X86pcmpeqm_c : PatFrag<(ops node:$src1, node:$src2), def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2), (X86cmpm node:$src1, node:$src2, (i8 6))>; -// FIXME: Is there a better scheduler itinerary for VPCMP? +// FIXME: Is there a better scheduler class for VPCMP? defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm_c, - SSE_ALU_F32P, avx512vl_i8_info, HasBWI, 1>, + WriteVecALU, avx512vl_i8_info, HasBWI, 1>, EVEX_CD8<8, CD8VF>, VEX_WIG; defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm_c, - SSE_ALU_F32P, avx512vl_i16_info, HasBWI, 1>, + WriteVecALU, avx512vl_i16_info, HasBWI, 1>, EVEX_CD8<16, CD8VF>, VEX_WIG; defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm_c, - SSE_ALU_F32P, avx512vl_i32_info, HasAVX512, 1>, + WriteVecALU, avx512vl_i32_info, HasAVX512, 1>, EVEX_CD8<32, CD8VF>; defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm_c, - SSE_ALU_F32P, avx512vl_i64_info, HasAVX512, 1>, + WriteVecALU, avx512vl_i64_info, HasAVX512, 1>, T8PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm, - SSE_ALU_F32P, avx512vl_i8_info, HasBWI>, + WriteVecALU, avx512vl_i8_info, HasBWI>, EVEX_CD8<8, CD8VF>, VEX_WIG; defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw", X86pcmpgtm, - SSE_ALU_F32P, avx512vl_i16_info, HasBWI>, + WriteVecALU, avx512vl_i16_info, HasBWI>, EVEX_CD8<16, CD8VF>, VEX_WIG; defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd", X86pcmpgtm, - SSE_ALU_F32P, avx512vl_i32_info, HasAVX512>, + WriteVecALU, avx512vl_i32_info, HasAVX512>, EVEX_CD8<32, CD8VF>; defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm, - SSE_ALU_F32P, avx512vl_i64_info, HasAVX512>, + WriteVecALU, avx512vl_i64_info, HasAVX512>, T8PD, VEX_W, EVEX_CD8<64, CD8VF>; // Transforms to swizzle an immediate to help matching memory operand in first @@ -2221,7 +2197,7 @@ def CommutePCMPCC : SDNodeXForm<imm, [{ }]>; multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode, - OpndItins itins, X86VectorVTInfo _> { + X86FoldableSchedWrite sched, X86VectorVTInfo _> { let isCommutable = 1 in def rri : AVX512AIi8<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVX512ICC:$cc), @@ -2229,7 +2205,7 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc))]>, - EVEX_4V, Sched<[itins.Sched]>; + EVEX_4V, Sched<[sched]>; def rmi : AVX512AIi8<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, AVX512ICC:$cc), !strconcat("vpcmp${cc}", Suffix, @@ -2237,7 +2213,7 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode, [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (_.VT (bitconvert (_.LdFrag addr:$src2))), imm:$cc))]>, - EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>; + EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; let isCommutable = 1 in def rrik : AVX512AIi8<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, @@ -2248,7 +2224,7 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode, [(set _.KRC:$dst, (and _.KRCWM:$mask, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc)))]>, - EVEX_4V, EVEX_K, Sched<[itins.Sched]>; + EVEX_4V, EVEX_K, Sched<[sched]>; def rmik : AVX512AIi8<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2, AVX512ICC:$cc), @@ -2259,7 +2235,7 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode, (OpNode (_.VT _.RC:$src1), (_.VT (bitconvert (_.LdFrag addr:$src2))), imm:$cc)))]>, - EVEX_4V, EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>; + EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>; // Accept explicit immediate argument form instead of comparison code. let isAsmParserOnly = 1, hasSideEffects = 0 in { @@ -2267,20 +2243,20 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode, (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|", "$dst, $src1, $src2, $cc}"), []>, - EVEX_4V, Sched<[itins.Sched]>; + EVEX_4V, Sched<[sched]>; let mayLoad = 1 in def rmi_alt : AVX512AIi8<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst|", "$dst, $src1, $src2, $cc}"), []>, - EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>; + EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; def rrik_alt : AVX512AIi8<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2, u8imm:$cc), !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst {${mask}}|", "$dst {${mask}}, $src1, $src2, $cc}"), []>, - EVEX_4V, EVEX_K, Sched<[itins.Sched]>; + EVEX_4V, EVEX_K, Sched<[sched]>; let mayLoad = 1 in def rmik_alt : AVX512AIi8<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2, @@ -2288,7 +2264,7 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode, !strconcat("vpcmp", Suffix, "\t{$cc, $src2, $src1, $dst {${mask}}|", "$dst {${mask}}, $src1, $src2, $cc}"), []>, - EVEX_4V, EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>; + EVEX_4V, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>; } def : Pat<(OpNode (bitconvert (_.LdFrag addr:$src2)), @@ -2304,8 +2280,8 @@ multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode, } multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, SDNode OpNode, - OpndItins itins, X86VectorVTInfo _> : - avx512_icmp_cc<opc, Suffix, OpNode, itins, _> { + X86FoldableSchedWrite sched, X86VectorVTInfo _> : + avx512_icmp_cc<opc, Suffix, OpNode, sched, _> { def rmib : AVX512AIi8<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, AVX512ICC:$cc), @@ -2315,7 +2291,7 @@ multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, SDNode OpNode, [(set _.KRC:$dst, (OpNode (_.VT _.RC:$src1), (X86VBroadcast (_.ScalarLdFrag addr:$src2)), imm:$cc))]>, - EVEX_4V, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>; + EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; def rmibk : AVX512AIi8<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2, AVX512ICC:$cc), @@ -2326,7 +2302,7 @@ multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, SDNode OpNode, (OpNode (_.VT _.RC:$src1), (X86VBroadcast (_.ScalarLdFrag addr:$src2)), imm:$cc)))]>, - EVEX_4V, EVEX_K, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>; + EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; // Accept explicit immediate argument form instead of comparison code. let isAsmParserOnly = 1, hasSideEffects = 0, mayLoad = 1 in { @@ -2336,14 +2312,14 @@ multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, SDNode OpNode, !strconcat("vpcmp", Suffix, "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst|", "$dst, $src1, ${src2}", _.BroadcastStr, ", $cc}"), []>, - EVEX_4V, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>; + EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; def rmibk_alt : AVX512AIi8<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$cc), !strconcat("vpcmp", Suffix, "\t{$cc, ${src2}", _.BroadcastStr, ", $src1, $dst {${mask}}|", "$dst {${mask}}, $src1, ${src2}", _.BroadcastStr, ", $cc}"), []>, - EVEX_4V, EVEX_K, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>; + EVEX_4V, EVEX_K, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; } def : Pat<(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src2)), @@ -2360,64 +2336,64 @@ multiclass avx512_icmp_cc_rmb<bits<8> opc, string Suffix, SDNode OpNode, } multiclass avx512_icmp_cc_vl<bits<8> opc, string Suffix, SDNode OpNode, - OpndItins itins, AVX512VLVectorVTInfo VTInfo, - Predicate prd> { + X86FoldableSchedWrite sched, + AVX512VLVectorVTInfo VTInfo, Predicate prd> { let Predicates = [prd] in - defm Z : avx512_icmp_cc<opc, Suffix, OpNode, itins, VTInfo.info512>, + defm Z : avx512_icmp_cc<opc, Suffix, OpNode, sched, VTInfo.info512>, EVEX_V512; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_icmp_cc<opc, Suffix, OpNode, itins, VTInfo.info256>, + defm Z256 : avx512_icmp_cc<opc, Suffix, OpNode, sched, VTInfo.info256>, EVEX_V256; - defm Z128 : avx512_icmp_cc<opc, Suffix, OpNode, itins, VTInfo.info128>, + defm Z128 : avx512_icmp_cc<opc, Suffix, OpNode, sched, VTInfo.info128>, EVEX_V128; } } multiclass avx512_icmp_cc_rmb_vl<bits<8> opc, string Suffix, SDNode OpNode, - OpndItins itins, AVX512VLVectorVTInfo VTInfo, - Predicate prd> { + X86FoldableSchedWrite sched, + AVX512VLVectorVTInfo VTInfo, Predicate prd> { let Predicates = [prd] in - defm Z : avx512_icmp_cc_rmb<opc, Suffix, OpNode, itins, VTInfo.info512>, + defm Z : avx512_icmp_cc_rmb<opc, Suffix, OpNode, sched, VTInfo.info512>, EVEX_V512; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, itins, VTInfo.info256>, + defm Z256 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, sched, VTInfo.info256>, EVEX_V256; - defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, itins, VTInfo.info128>, + defm Z128 : avx512_icmp_cc_rmb<opc, Suffix, OpNode, sched, VTInfo.info128>, EVEX_V128; } } -// FIXME: Is there a better scheduler itinerary for VPCMP/VPCMPU? -defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86cmpm, SSE_ALU_F32P, +// FIXME: Is there a better scheduler class for VPCMP/VPCMPU? +defm VPCMPB : avx512_icmp_cc_vl<0x3F, "b", X86cmpm, WriteVecALU, avx512vl_i8_info, HasBWI>, EVEX_CD8<8, CD8VF>; -defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86cmpmu, SSE_ALU_F32P, +defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86cmpmu, WriteVecALU, avx512vl_i8_info, HasBWI>, EVEX_CD8<8, CD8VF>; -defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86cmpm, SSE_ALU_F32P, +defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86cmpm, WriteVecALU, avx512vl_i16_info, HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>; -defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86cmpmu, SSE_ALU_F32P, +defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86cmpmu, WriteVecALU, avx512vl_i16_info, HasBWI>, VEX_W, EVEX_CD8<16, CD8VF>; -defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86cmpm, SSE_ALU_F32P, +defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86cmpm, WriteVecALU, avx512vl_i32_info, HasAVX512>, EVEX_CD8<32, CD8VF>; -defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86cmpmu, SSE_ALU_F32P, +defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86cmpmu, WriteVecALU, avx512vl_i32_info, HasAVX512>, EVEX_CD8<32, CD8VF>; -defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, SSE_ALU_F32P, +defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86cmpm, WriteVecALU, avx512vl_i64_info, HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, SSE_ALU_F32P, +defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86cmpmu, WriteVecALU, avx512vl_i64_info, HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>; -multiclass avx512_vcmp_common<OpndItins itins, X86VectorVTInfo _> { +multiclass avx512_vcmp_common<X86FoldableSchedWrite sched, X86VectorVTInfo _> { defm rri : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2,AVXCC:$cc), "vcmp${cc}"#_.Suffix, @@ -2425,7 +2401,7 @@ multiclass avx512_vcmp_common<OpndItins itins, X86VectorVTInfo _> { (X86cmpm (_.VT _.RC:$src1), (_.VT _.RC:$src2), imm:$cc), 1>, - Sched<[itins.Sched]>; + Sched<[sched]>; defm rmi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc), @@ -2434,7 +2410,7 @@ multiclass avx512_vcmp_common<OpndItins itins, X86VectorVTInfo _> { (X86cmpm (_.VT _.RC:$src1), (_.VT (bitconvert (_.LdFrag addr:$src2))), imm:$cc)>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; defm rmbi : AVX512_maskable_cmp<0xC2, MRMSrcMem, _, (outs _.KRC:$dst), @@ -2445,7 +2421,7 @@ multiclass avx512_vcmp_common<OpndItins itins, X86VectorVTInfo _> { (X86cmpm (_.VT _.RC:$src1), (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), imm:$cc)>, - EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>; + EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; // Accept explicit immediate argument form instead of comparison code. let isAsmParserOnly = 1, hasSideEffects = 0 in { defm rri_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _, @@ -2453,7 +2429,7 @@ multiclass avx512_vcmp_common<OpndItins itins, X86VectorVTInfo _> { (ins _.RC:$src1, _.RC:$src2, u8imm:$cc), "vcmp"#_.Suffix, "$cc, $src2, $src1", "$src1, $src2, $cc">, - Sched<[itins.Sched]>; + Sched<[sched]>; let mayLoad = 1 in { defm rmi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _, @@ -2461,7 +2437,7 @@ multiclass avx512_vcmp_common<OpndItins itins, X86VectorVTInfo _> { (ins _.RC:$src1, _.MemOp:$src2, u8imm:$cc), "vcmp"#_.Suffix, "$cc, $src2, $src1", "$src1, $src2, $cc">, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; defm rmbi_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcMem, _, (outs _.KRC:$dst), @@ -2469,7 +2445,7 @@ multiclass avx512_vcmp_common<OpndItins itins, X86VectorVTInfo _> { "vcmp"#_.Suffix, "$cc, ${src2}"##_.BroadcastStr##", $src1", "$src1, ${src2}"##_.BroadcastStr##", $cc">, - EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>; + EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; } } @@ -2500,7 +2476,7 @@ multiclass avx512_vcmp_common<OpndItins itins, X86VectorVTInfo _> { imm:$cc)>; } -multiclass avx512_vcmp_sae<OpndItins itins, X86VectorVTInfo _> { +multiclass avx512_vcmp_sae<X86FoldableSchedWrite sched, X86VectorVTInfo _> { // comparison code form (VCMP[EQ/LT/LE/...] defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _, (outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc), @@ -2510,7 +2486,7 @@ multiclass avx512_vcmp_sae<OpndItins itins, X86VectorVTInfo _> { (_.VT _.RC:$src2), imm:$cc, (i32 FROUND_NO_EXC))>, - EVEX_B, Sched<[itins.Sched]>; + EVEX_B, Sched<[sched]>; let isAsmParserOnly = 1, hasSideEffects = 0 in { defm rrib_alt : AVX512_maskable_cmp_alt<0xC2, MRMSrcReg, _, @@ -2519,25 +2495,25 @@ multiclass avx512_vcmp_sae<OpndItins itins, X86VectorVTInfo _> { "vcmp"#_.Suffix, "$cc, {sae}, $src2, $src1", "$src1, $src2, {sae}, $cc">, - EVEX_B, Sched<[itins.Sched]>; + EVEX_B, Sched<[sched]>; } } -multiclass avx512_vcmp<OpndItins itins, AVX512VLVectorVTInfo _> { +multiclass avx512_vcmp<X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { let Predicates = [HasAVX512] in { - defm Z : avx512_vcmp_common<itins, _.info512>, - avx512_vcmp_sae<itins, _.info512>, EVEX_V512; + defm Z : avx512_vcmp_common<sched, _.info512>, + avx512_vcmp_sae<sched, _.info512>, EVEX_V512; } let Predicates = [HasAVX512,HasVLX] in { - defm Z128 : avx512_vcmp_common<itins, _.info128>, EVEX_V128; - defm Z256 : avx512_vcmp_common<itins, _.info256>, EVEX_V256; + defm Z128 : avx512_vcmp_common<sched, _.info128>, EVEX_V128; + defm Z256 : avx512_vcmp_common<sched, _.info256>, EVEX_V256; } } -defm VCMPPD : avx512_vcmp<SSE_ALU_F64P, avx512vl_f64_info>, +defm VCMPPD : avx512_vcmp<WriteFAdd, avx512vl_f64_info>, AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; -defm VCMPPS : avx512_vcmp<SSE_ALU_F32P, avx512vl_f32_info>, +defm VCMPPS : avx512_vcmp<WriteFAdd, avx512vl_f32_info>, AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; @@ -2557,7 +2533,7 @@ let Predicates = [HasAVX512] in { //handle fpclass instruction mask = op(reg_scalar,imm) // op(mem_scalar,imm) multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, X86VectorVTInfo _, + X86FoldableSchedWrite sched, X86VectorVTInfo _, Predicate prd> { let Predicates = [prd], ExeDomain = _.ExeDomain in { def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), @@ -2565,7 +2541,7 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode, OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1), (i32 imm:$src2)))]>, - Sched<[itins.Sched]>; + Sched<[sched]>; def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), OpcodeStr##_.Suffix# @@ -2573,7 +2549,7 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode, [(set _.KRC:$dst,(and _.KRCWM:$mask, (OpNode (_.VT _.RC:$src1), (i32 imm:$src2))))]>, - EVEX_K, Sched<[itins.Sched]>; + EVEX_K, Sched<[sched]>; def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.IntScalarMemOp:$src1, i32u8imm:$src2), OpcodeStr##_.Suffix## @@ -2581,7 +2557,7 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode, [(set _.KRC:$dst, (OpNode _.ScalarIntMemCPat:$src1, (i32 imm:$src2)))]>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.IntScalarMemOp:$src1, i32u8imm:$src2), OpcodeStr##_.Suffix## @@ -2589,7 +2565,7 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode, [(set _.KRC:$dst,(and _.KRCWM:$mask, (OpNode _.ScalarIntMemCPat:$src1, (i32 imm:$src2))))]>, - EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>; + EVEX_K, Sched<[sched.Folded, ReadAfterLd]>; } } @@ -2597,7 +2573,7 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode, // fpclass(reg_vec, mem_vec, imm) // fpclass(reg_vec, broadcast(eltVt), imm) multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, X86VectorVTInfo _, + X86FoldableSchedWrite sched, X86VectorVTInfo _, string mem, string broadcast>{ let ExeDomain = _.ExeDomain in { def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), @@ -2605,7 +2581,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode, OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1), (i32 imm:$src2)))]>, - Sched<[itins.Sched]>; + Sched<[sched]>; def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2), OpcodeStr##_.Suffix# @@ -2613,7 +2589,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode, [(set _.KRC:$dst,(and _.KRCWM:$mask, (OpNode (_.VT _.RC:$src1), (i32 imm:$src2))))]>, - EVEX_K, Sched<[itins.Sched]>; + EVEX_K, Sched<[sched]>; def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.MemOp:$src1, i32u8imm:$src2), OpcodeStr##_.Suffix##mem# @@ -2621,7 +2597,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode, [(set _.KRC:$dst,(OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), (i32 imm:$src2)))]>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2), OpcodeStr##_.Suffix##mem# @@ -2629,7 +2605,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode, [(set _.KRC:$dst, (and _.KRCWM:$mask, (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), (i32 imm:$src2))))]>, - EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>; + EVEX_K, Sched<[sched.Folded, ReadAfterLd]>; def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.ScalarMemOp:$src1, i32u8imm:$src2), OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"## @@ -2639,7 +2615,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode, (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src1))), (i32 imm:$src2)))]>, - EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>; + EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst), (ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2), OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"## @@ -2649,40 +2625,40 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode, (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src1))), (i32 imm:$src2))))]>, - EVEX_B, EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>; + EVEX_B, EVEX_K, Sched<[sched.Folded, ReadAfterLd]>; } } multiclass avx512_vector_fpclass_all<string OpcodeStr, AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, - OpndItins itins, Predicate prd, + X86FoldableSchedWrite sched, Predicate prd, string broadcast>{ let Predicates = [prd] in { - defm Z : avx512_vector_fpclass<opc, OpcodeStr, OpNode, itins, + defm Z : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched, _.info512, "{z}", broadcast>, EVEX_V512; } let Predicates = [prd, HasVLX] in { - defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, itins, + defm Z128 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched, _.info128, "{x}", broadcast>, EVEX_V128; - defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, itins, + defm Z256 : avx512_vector_fpclass<opc, OpcodeStr, OpNode, sched, _.info256, "{y}", broadcast>, EVEX_V256; } } -// FIXME: Is there a better scheduler itinerary for VFPCLASS? +// FIXME: Is there a better scheduler class for VFPCLASS? multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec, bits<8> opcScalar, SDNode VecOpNode, SDNode ScalarOpNode, Predicate prd>{ defm PS : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f32_info, opcVec, - VecOpNode, SSE_ALU_F32P, prd, "{l}">, + VecOpNode, WriteFAdd, prd, "{l}">, EVEX_CD8<32, CD8VF>; defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec, - VecOpNode, SSE_ALU_F64P, prd, "{q}">, + VecOpNode, WriteFAdd, prd, "{q}">, EVEX_CD8<64, CD8VF> , VEX_W; defm SS : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode, - SSE_ALU_F32S, f32x_info, prd>, + WriteFAdd, f32x_info, prd>, EVEX_CD8<32, CD8VT1>; defm SD : avx512_scalar_fpclass<opcScalar, OpcodeStr, ScalarOpNode, - SSE_ALU_F64S, f64x_info, prd>, + WriteFAdd, f64x_info, prd>, EVEX_CD8<64, CD8VT1>, VEX_W; } @@ -2823,27 +2799,28 @@ let Predicates = [HasAVX512] in { // - KNOT multiclass avx512_mask_unop<bits<8> opc, string OpcodeStr, RegisterClass KRC, SDPatternOperator OpNode, - OpndItins itins, Predicate prd> { + X86FoldableSchedWrite sched, Predicate prd> { let Predicates = [prd] in def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set KRC:$dst, (OpNode KRC:$src))]>, - Sched<[itins.Sched]>; + Sched<[sched]>; } multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr, - SDPatternOperator OpNode, OpndItins itins> { + SDPatternOperator OpNode, + X86FoldableSchedWrite sched> { defm B : avx512_mask_unop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode, - itins, HasDQI>, VEX, PD; + sched, HasDQI>, VEX, PD; defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode, - itins, HasAVX512>, VEX, PS; + sched, HasAVX512>, VEX, PS; defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode, - itins, HasBWI>, VEX, PD, VEX_W; + sched, HasBWI>, VEX, PD, VEX_W; defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode, - itins, HasBWI>, VEX, PS, VEX_W; + sched, HasBWI>, VEX, PS, VEX_W; } -defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, SSE_BIT_ITINS_P>; +defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot, WriteVecLogic>; // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit let Predicates = [HasAVX512, NoDQI] in @@ -2859,26 +2836,28 @@ def : Pat<(vnot VK2:$src), // - KAND, KANDN, KOR, KXNOR, KXOR multiclass avx512_mask_binop<bits<8> opc, string OpcodeStr, RegisterClass KRC, SDPatternOperator OpNode, - OpndItins itins, Predicate prd, bit IsCommutable> { + X86FoldableSchedWrite sched, Predicate prd, + bit IsCommutable> { let Predicates = [prd], isCommutable = IsCommutable in def rr : I<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set KRC:$dst, (OpNode KRC:$src1, KRC:$src2))]>, - Sched<[itins.Sched]>; + Sched<[sched]>; } multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr, - SDPatternOperator OpNode, OpndItins itins, - bit IsCommutable, Predicate prdW = HasAVX512> { + SDPatternOperator OpNode, + X86FoldableSchedWrite sched, bit IsCommutable, + Predicate prdW = HasAVX512> { defm B : avx512_mask_binop<opc, !strconcat(OpcodeStr, "b"), VK8, OpNode, - itins, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD; + sched, HasDQI, IsCommutable>, VEX_4V, VEX_L, PD; defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode, - itins, prdW, IsCommutable>, VEX_4V, VEX_L, PS; + sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS; defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode, - itins, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD; + sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD; defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode, - itins, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS; + sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS; } def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>; @@ -2887,12 +2866,12 @@ def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>; def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>; def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>; -defm KAND : avx512_mask_binop_all<0x41, "kand", and, SSE_BIT_ITINS_P, 1>; -defm KOR : avx512_mask_binop_all<0x45, "kor", or, SSE_BIT_ITINS_P, 1>; -defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, SSE_BIT_ITINS_P, 1>; -defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, SSE_BIT_ITINS_P, 1>; -defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, SSE_BIT_ITINS_P, 0>; -defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, SSE_BIT_ITINS_P, 1, HasDQI>; +defm KAND : avx512_mask_binop_all<0x41, "kand", and, WriteVecLogic, 1>; +defm KOR : avx512_mask_binop_all<0x45, "kor", or, WriteVecLogic, 1>; +defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, WriteVecLogic, 1>; +defm KXOR : avx512_mask_binop_all<0x47, "kxor", xor, WriteVecLogic, 1>; +defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, WriteVecLogic, 0>; +defm KADD : avx512_mask_binop_all<0x4A, "kadd", X86kadd, WriteVecLogic, 1, HasDQI>; multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode, Instruction Inst> { @@ -2927,13 +2906,14 @@ defm : avx512_binop_pat<xor, xor, KXORWrr>; // Mask unpacking multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT, - RegisterClass KRCSrc, OpndItins itins, Predicate prd> { + RegisterClass KRCSrc, X86FoldableSchedWrite sched, + Predicate prd> { let Predicates = [prd] in { let hasSideEffects = 0 in def rr : I<0x4b, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src1, KRC:$src2), "kunpck"#Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - VEX_4V, VEX_L, Sched<[itins.Sched]>; + VEX_4V, VEX_L, Sched<[sched]>; def : Pat<(VT (concat_vectors KRCSrc:$src1, KRCSrc:$src2)), (!cast<Instruction>(NAME##rr) @@ -2942,63 +2922,65 @@ multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT, } } -defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, SSE_UNPCK, HasAVX512>, PD; -defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, SSE_UNPCK, HasBWI>, PS; -defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, SSE_UNPCK, HasBWI>, PS, VEX_W; +defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, WriteShuffle, HasAVX512>, PD; +defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, WriteShuffle, HasBWI>, PS; +defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, WriteShuffle, HasBWI>, PS, VEX_W; // Mask bit testing multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC, - SDNode OpNode, OpndItins itins, Predicate prd> { + SDNode OpNode, X86FoldableSchedWrite sched, + Predicate prd> { let Predicates = [prd], Defs = [EFLAGS] in def rr : I<opc, MRMSrcReg, (outs), (ins KRC:$src1, KRC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), [(set EFLAGS, (OpNode KRC:$src1, KRC:$src2))]>, - Sched<[itins.Sched]>; + Sched<[sched]>; } multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, Predicate prdW = HasAVX512> { - defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, itins, HasDQI>, + X86FoldableSchedWrite sched, + Predicate prdW = HasAVX512> { + defm B : avx512_mask_testop<opc, OpcodeStr#"b", VK8, OpNode, sched, HasDQI>, VEX, PD; - defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, itins, prdW>, + defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>, VEX, PS; - defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, itins, HasBWI>, + defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>, VEX, PS, VEX_W; - defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, itins, HasBWI>, + defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>, VEX, PD, VEX_W; } -defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, SSE_PTEST>; -defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, SSE_PTEST, HasDQI>; +defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest, WriteVecLogic>; +defm KTEST : avx512_mask_testop_w<0x99, "ktest", X86ktest, WriteVecLogic, HasDQI>; // Mask shift multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC, - SDNode OpNode, OpndItins itins> { + SDNode OpNode, X86FoldableSchedWrite sched> { let Predicates = [HasAVX512] in def ri : Ii8<opc, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src, u8imm:$imm), !strconcat(OpcodeStr, "\t{$imm, $src, $dst|$dst, $src, $imm}"), [(set KRC:$dst, (OpNode KRC:$src, (i8 imm:$imm)))]>, - Sched<[itins.Sched]>; + Sched<[sched]>; } multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr, - SDNode OpNode, OpndItins itins> { + SDNode OpNode, X86FoldableSchedWrite sched> { defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode, - itins>, VEX, TAPD, VEX_W; + sched>, VEX, TAPD, VEX_W; let Predicates = [HasDQI] in defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode, - itins>, VEX, TAPD; + sched>, VEX, TAPD; let Predicates = [HasBWI] in { defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode, - itins>, VEX, TAPD, VEX_W; + sched>, VEX, TAPD, VEX_W; defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode, - itins>, VEX, TAPD; + sched>, VEX, TAPD; } } -defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, SSE_PSHUF>; -defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, SSE_PSHUF>; +defm KSHIFTL : avx512_mask_shiftop_w<0x32, 0x33, "kshiftl", X86kshiftl, WriteShuffle>; +defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, WriteShuffle>; multiclass axv512_icmp_packed_no_vlx_lowering<PatFrag Frag, string InstStr, X86VectorVTInfo Narrow, @@ -4319,14 +4301,14 @@ let Predicates = [HasVLX], AddedComplexity = 400 in { // AVX-512 - Integer arithmetic // multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _, OpndItins itins, + X86VectorVTInfo _, X86FoldableSchedWrite sched, bit IsCommutable = 0> { defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, _.RC:$src2)), IsCommutable>, AVX512BIBase, EVEX_4V, - Sched<[itins.Sched]>; + Sched<[sched]>; defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, @@ -4334,13 +4316,13 @@ multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, (_.VT (OpNode _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))))>, AVX512BIBase, EVEX_4V, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _, OpndItins itins, + X86VectorVTInfo _, X86FoldableSchedWrite sched, bit IsCommutable = 0> : - avx512_binop_rm<opc, OpcodeStr, OpNode, _, itins, IsCommutable> { + avx512_binop_rm<opc, OpcodeStr, OpNode, _, sched, IsCommutable> { defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, "${src2}"##_.BroadcastStr##", $src1", @@ -4349,101 +4331,105 @@ multiclass avx512_binop_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, (X86VBroadcast (_.ScalarLdFrag addr:$src2))))>, AVX512BIBase, EVEX_4V, EVEX_B, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } multiclass avx512_binop_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, - AVX512VLVectorVTInfo VTInfo, OpndItins itins, - Predicate prd, bit IsCommutable = 0> { + AVX512VLVectorVTInfo VTInfo, + X86FoldableSchedWrite sched, Predicate prd, + bit IsCommutable = 0> { let Predicates = [prd] in - defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, itins, + defm Z : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info512, sched, IsCommutable>, EVEX_V512; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256, itins, + defm Z256 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info256, sched, IsCommutable>, EVEX_V256; - defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128, itins, + defm Z128 : avx512_binop_rm<opc, OpcodeStr, OpNode, VTInfo.info128, sched, IsCommutable>, EVEX_V128; } } multiclass avx512_binop_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, - AVX512VLVectorVTInfo VTInfo, OpndItins itins, - Predicate prd, bit IsCommutable = 0> { + AVX512VLVectorVTInfo VTInfo, + X86FoldableSchedWrite sched, Predicate prd, + bit IsCommutable = 0> { let Predicates = [prd] in - defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, itins, + defm Z : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, sched, IsCommutable>, EVEX_V512; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, itins, + defm Z256 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, sched, IsCommutable>, EVEX_V256; - defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, itins, + defm Z128 : avx512_binop_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, sched, IsCommutable>, EVEX_V128; } } multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, Predicate prd, + X86FoldableSchedWrite sched, Predicate prd, bit IsCommutable = 0> { defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info, - itins, prd, IsCommutable>, - VEX_W, EVEX_CD8<64, CD8VF>; + sched, prd, IsCommutable>, + VEX_W, EVEX_CD8<64, CD8VF>; } multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, Predicate prd, + X86FoldableSchedWrite sched, Predicate prd, bit IsCommutable = 0> { defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info, - itins, prd, IsCommutable>, EVEX_CD8<32, CD8VF>; + sched, prd, IsCommutable>, EVEX_CD8<32, CD8VF>; } multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, Predicate prd, + X86FoldableSchedWrite sched, Predicate prd, bit IsCommutable = 0> { defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info, - itins, prd, IsCommutable>, EVEX_CD8<16, CD8VF>, - VEX_WIG; + sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>, + VEX_WIG; } multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, Predicate prd, + X86FoldableSchedWrite sched, Predicate prd, bit IsCommutable = 0> { defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info, - itins, prd, IsCommutable>, EVEX_CD8<8, CD8VF>, - VEX_WIG; + sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>, + VEX_WIG; } multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, - SDNode OpNode, OpndItins itins, Predicate prd, - bit IsCommutable = 0> { - defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, itins, prd, + SDNode OpNode, X86FoldableSchedWrite sched, + Predicate prd, bit IsCommutable = 0> { + defm Q : avx512_binop_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, sched, prd, IsCommutable>; - defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, itins, prd, + defm D : avx512_binop_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, sched, prd, IsCommutable>; } multiclass avx512_binop_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr, - SDNode OpNode, OpndItins itins, Predicate prd, - bit IsCommutable = 0> { - defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, itins, prd, + SDNode OpNode, X86FoldableSchedWrite sched, + Predicate prd, bit IsCommutable = 0> { + defm W : avx512_binop_rm_vl_w<opc_w, OpcodeStr#"w", OpNode, sched, prd, IsCommutable>; - defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, itins, prd, + defm B : avx512_binop_rm_vl_b<opc_b, OpcodeStr#"b", OpNode, sched, prd, IsCommutable>; } multiclass avx512_binop_rm_vl_all<bits<8> opc_b, bits<8> opc_w, bits<8> opc_d, bits<8> opc_q, string OpcodeStr, SDNode OpNode, - OpndItins itins, bit IsCommutable = 0> { + X86FoldableSchedWrite sched, + bit IsCommutable = 0> { defm NAME : avx512_binop_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, - itins, HasAVX512, IsCommutable>, + sched, HasAVX512, IsCommutable>, avx512_binop_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, - itins, HasBWI, IsCommutable>; + sched, HasBWI, IsCommutable>; } -multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, OpndItins itins, +multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, + X86FoldableSchedWrite sched, SDNode OpNode,X86VectorVTInfo _Src, X86VectorVTInfo _Dst, X86VectorVTInfo _Brdct, bit IsCommutable = 0> { @@ -4454,14 +4440,14 @@ multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, OpndItins itins, (_Src.VT _Src.RC:$src1), (_Src.VT _Src.RC:$src2))), IsCommutable>, - AVX512BIBase, EVEX_4V, Sched<[itins.Sched]>; + AVX512BIBase, EVEX_4V, Sched<[sched]>; defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert (_Src.LdFrag addr:$src2))))>, AVX512BIBase, EVEX_4V, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2), @@ -4472,67 +4458,69 @@ multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, OpndItins itins, (_Brdct.VT (X86VBroadcast (_Brdct.ScalarLdFrag addr:$src2))))))>, AVX512BIBase, EVEX_4V, EVEX_B, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } defm VPADD : avx512_binop_rm_vl_all<0xFC, 0xFD, 0xFE, 0xD4, "vpadd", add, - SSE_INTALU_ITINS_P, 1>; + WriteVecALU, 1>; defm VPSUB : avx512_binop_rm_vl_all<0xF8, 0xF9, 0xFA, 0xFB, "vpsub", sub, - SSE_INTALU_ITINS_P, 0>; + WriteVecALU, 0>; defm VPADDS : avx512_binop_rm_vl_bw<0xEC, 0xED, "vpadds", X86adds, - SSE_INTALU_ITINS_P, HasBWI, 1>; + WriteVecALU, HasBWI, 1>; defm VPSUBS : avx512_binop_rm_vl_bw<0xE8, 0xE9, "vpsubs", X86subs, - SSE_INTALU_ITINS_P, HasBWI, 0>; + WriteVecALU, HasBWI, 0>; defm VPADDUS : avx512_binop_rm_vl_bw<0xDC, 0xDD, "vpaddus", X86addus, - SSE_INTALU_ITINS_P, HasBWI, 1>; + WriteVecALU, HasBWI, 1>; defm VPSUBUS : avx512_binop_rm_vl_bw<0xD8, 0xD9, "vpsubus", X86subus, - SSE_INTALU_ITINS_P, HasBWI, 0>; + WriteVecALU, HasBWI, 0>; defm VPMULLD : avx512_binop_rm_vl_d<0x40, "vpmulld", mul, - SSE_PMULLD_ITINS, HasAVX512, 1>, T8PD; + WritePMULLD, HasAVX512, 1>, T8PD; defm VPMULLW : avx512_binop_rm_vl_w<0xD5, "vpmullw", mul, - SSE_INTMUL_ITINS_P, HasBWI, 1>; + WriteVecIMul, HasBWI, 1>; defm VPMULLQ : avx512_binop_rm_vl_q<0x40, "vpmullq", mul, - SSE_INTMUL_ITINS_P, HasDQI, 1>, T8PD; -defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, SSE_INTMUL_ITINS_P, + WriteVecIMul, HasDQI, 1>, T8PD; +defm VPMULHW : avx512_binop_rm_vl_w<0xE5, "vpmulhw", mulhs, WriteVecIMul, HasBWI, 1>; -defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, SSE_INTMUL_ITINS_P, +defm VPMULHUW : avx512_binop_rm_vl_w<0xE4, "vpmulhuw", mulhu, WriteVecIMul, HasBWI, 1>; -defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, SSE_INTMUL_ITINS_P, +defm VPMULHRSW : avx512_binop_rm_vl_w<0x0B, "vpmulhrsw", X86mulhrs, WriteVecIMul, HasBWI, 1>, T8PD; defm VPAVG : avx512_binop_rm_vl_bw<0xE0, 0xE3, "vpavg", X86avg, - SSE_INTALU_ITINS_P, HasBWI, 1>; + WriteVecIMul, HasBWI, 1>; defm VPMULDQ : avx512_binop_rm_vl_q<0x28, "vpmuldq", X86pmuldq, - SSE_INTMUL_ITINS_P, HasAVX512, 1>, T8PD; + WriteVecIMul, HasAVX512, 1>, T8PD; defm VPMULUDQ : avx512_binop_rm_vl_q<0xF4, "vpmuludq", X86pmuludq, - SSE_INTMUL_ITINS_P, HasAVX512, 1>; + WriteVecIMul, HasAVX512, 1>; -multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, OpndItins itins, - AVX512VLVectorVTInfo _SrcVTInfo, AVX512VLVectorVTInfo _DstVTInfo, +multiclass avx512_binop_all<bits<8> opc, string OpcodeStr, + X86FoldableSchedWrite sched, + AVX512VLVectorVTInfo _SrcVTInfo, + AVX512VLVectorVTInfo _DstVTInfo, SDNode OpNode, Predicate prd, bit IsCommutable = 0> { let Predicates = [prd] in - defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode, + defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched, OpNode, _SrcVTInfo.info512, _DstVTInfo.info512, v8i64_info, IsCommutable>, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W; let Predicates = [HasVLX, prd] in { - defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode, + defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched, OpNode, _SrcVTInfo.info256, _DstVTInfo.info256, v4i64x_info, IsCommutable>, EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W; - defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, itins, OpNode, + defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched, OpNode, _SrcVTInfo.info128, _DstVTInfo.info128, v2i64x_info, IsCommutable>, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W; } } -defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SSE_INTALU_ITINS_P, +defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", WriteVecALU, avx512vl_i8_info, avx512vl_i8_info, X86multishift, HasVBMI, 0>, T8PD; multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _Src, X86VectorVTInfo _Dst, - OpndItins itins> { + X86FoldableSchedWrite sched> { defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2), OpcodeStr, @@ -4542,12 +4530,12 @@ multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, (_Src.VT (X86VBroadcast (_Src.ScalarLdFrag addr:$src2))))))>, EVEX_4V, EVEX_B, EVEX_CD8<_Src.EltSize, CD8VF>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,X86VectorVTInfo _Src, - X86VectorVTInfo _Dst, OpndItins itins, + X86VectorVTInfo _Dst, X86FoldableSchedWrite sched, bit IsCommutable = 0> { defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr, @@ -4556,44 +4544,44 @@ multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr, (_Src.VT _Src.RC:$src1), (_Src.VT _Src.RC:$src2))), IsCommutable>, - EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[itins.Sched]>; + EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V, Sched<[sched]>; defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert (_Src.LdFrag addr:$src2))))>, EVEX_4V, EVEX_CD8<_Src.EltSize, CD8VF>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } multiclass avx512_packs_all_i32_i16<bits<8> opc, string OpcodeStr, SDNode OpNode> { let Predicates = [HasBWI] in defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i32_info, - v32i16_info, SSE_PACK>, + v32i16_info, WriteShuffle>, avx512_packs_rmb<opc, OpcodeStr, OpNode, v16i32_info, - v32i16_info, SSE_PACK>, EVEX_V512; + v32i16_info, WriteShuffle>, EVEX_V512; let Predicates = [HasBWI, HasVLX] in { defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i32x_info, - v16i16x_info, SSE_PACK>, + v16i16x_info, WriteShuffle>, avx512_packs_rmb<opc, OpcodeStr, OpNode, v8i32x_info, - v16i16x_info, SSE_PACK>, EVEX_V256; + v16i16x_info, WriteShuffle>, EVEX_V256; defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v4i32x_info, - v8i16x_info, SSE_PACK>, + v8i16x_info, WriteShuffle>, avx512_packs_rmb<opc, OpcodeStr, OpNode, v4i32x_info, - v8i16x_info, SSE_PACK>, EVEX_V128; + v8i16x_info, WriteShuffle>, EVEX_V128; } } multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr, SDNode OpNode> { let Predicates = [HasBWI] in defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, - v64i8_info, SSE_PACK>, EVEX_V512, VEX_WIG; + v64i8_info, WriteShuffle>, EVEX_V512, VEX_WIG; let Predicates = [HasBWI, HasVLX] in { defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info, - v32i8x_info, SSE_PACK>, EVEX_V256, VEX_WIG; + v32i8x_info, WriteShuffle>, EVEX_V256, VEX_WIG; defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info, - v16i8x_info, SSE_PACK>, EVEX_V128, VEX_WIG; + v16i8x_info, WriteShuffle>, EVEX_V128, VEX_WIG; } } @@ -4602,12 +4590,12 @@ multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr, AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> { let Predicates = [HasBWI] in defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512, - _Dst.info512, SSE_PMADD, IsCommutable>, EVEX_V512; + _Dst.info512, WriteVecIMul, IsCommutable>, EVEX_V512; let Predicates = [HasBWI, HasVLX] in { defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256, - _Dst.info256, SSE_PMADD, IsCommutable>, EVEX_V256; + _Dst.info256, WriteVecIMul, IsCommutable>, EVEX_V256; defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128, - _Dst.info128, SSE_PMADD, IsCommutable>, EVEX_V128; + _Dst.info128, WriteVecIMul, IsCommutable>, EVEX_V128; } } @@ -4622,32 +4610,32 @@ defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd, avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG; defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax, - SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; + WriteVecALU, HasBWI, 1>, T8PD; defm VPMAXSW : avx512_binop_rm_vl_w<0xEE, "vpmaxsw", smax, - SSE_INTALU_ITINS_P, HasBWI, 1>; + WriteVecALU, HasBWI, 1>; defm VPMAXS : avx512_binop_rm_vl_dq<0x3D, 0x3D, "vpmaxs", smax, - SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; + WriteVecALU, HasAVX512, 1>, T8PD; defm VPMAXUB : avx512_binop_rm_vl_b<0xDE, "vpmaxub", umax, - SSE_INTALU_ITINS_P, HasBWI, 1>; + WriteVecALU, HasBWI, 1>; defm VPMAXUW : avx512_binop_rm_vl_w<0x3E, "vpmaxuw", umax, - SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; + WriteVecALU, HasBWI, 1>, T8PD; defm VPMAXU : avx512_binop_rm_vl_dq<0x3F, 0x3F, "vpmaxu", umax, - SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; + WriteVecALU, HasAVX512, 1>, T8PD; defm VPMINSB : avx512_binop_rm_vl_b<0x38, "vpminsb", smin, - SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; + WriteVecALU, HasBWI, 1>, T8PD; defm VPMINSW : avx512_binop_rm_vl_w<0xEA, "vpminsw", smin, - SSE_INTALU_ITINS_P, HasBWI, 1>; + WriteVecALU, HasBWI, 1>; defm VPMINS : avx512_binop_rm_vl_dq<0x39, 0x39, "vpmins", smin, - SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; + WriteVecALU, HasAVX512, 1>, T8PD; defm VPMINUB : avx512_binop_rm_vl_b<0xDA, "vpminub", umin, - SSE_INTALU_ITINS_P, HasBWI, 1>; + WriteVecALU, HasBWI, 1>; defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin, - SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; + WriteVecALU, HasBWI, 1>, T8PD; defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", umin, - SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; + WriteVecALU, HasAVX512, 1>, T8PD; // PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX. let Predicates = [HasDQI, NoVLX] in { @@ -4714,8 +4702,8 @@ let Predicates = [HasAVX512, NoVLX] in { // be set to null_frag for 32-bit elements. multiclass avx512_logic_rm<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, - SDNode OpNodeMsk, OpndItins itins, X86VectorVTInfo _, - bit IsCommutable = 0> { + SDNode OpNodeMsk, X86FoldableSchedWrite sched, + X86VectorVTInfo _, bit IsCommutable = 0> { let hasSideEffects = 0 in defm rr : AVX512_maskable_logic<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2), OpcodeStr, @@ -4725,7 +4713,7 @@ multiclass avx512_logic_rm<bits<8> opc, string OpcodeStr, (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1, _.RC:$src2)))), IsCommutable>, AVX512BIBase, EVEX_4V, - Sched<[itins.Sched]>; + Sched<[sched]>; let hasSideEffects = 0, mayLoad = 1 in defm rm : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst), @@ -4736,16 +4724,16 @@ multiclass avx512_logic_rm<bits<8> opc, string OpcodeStr, (_.VT (bitconvert (_.i64VT (OpNodeMsk _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))))))>, AVX512BIBase, EVEX_4V, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } // OpNodeMsk is the OpNode to use where element size is important. So use // for all of the broadcast patterns. multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, - SDNode OpNodeMsk, OpndItins itins, X86VectorVTInfo _, + SDNode OpNodeMsk, X86FoldableSchedWrite sched, X86VectorVTInfo _, bit IsCommutable = 0> : - avx512_logic_rm<opc, OpcodeStr, OpNode, OpNodeMsk, itins, _, + avx512_logic_rm<opc, OpcodeStr, OpNode, OpNodeMsk, sched, _, IsCommutable> { defm rmb : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, @@ -4760,55 +4748,55 @@ multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr, (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src2))))))))>, AVX512BIBase, EVEX_4V, EVEX_B, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } multiclass avx512_logic_rmb_vl<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, - SDNode OpNodeMsk, OpndItins itins, + SDNode OpNodeMsk, X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo, bit IsCommutable = 0> { let Predicates = [HasAVX512] in - defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, itins, + defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched, VTInfo.info512, IsCommutable>, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in { - defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, itins, + defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched, VTInfo.info256, IsCommutable>, EVEX_V256; - defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, itins, + defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, OpNodeMsk, sched, VTInfo.info128, IsCommutable>, EVEX_V128; } } multiclass avx512_logic_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, - SDNode OpNode, OpndItins itins, + SDNode OpNode, X86FoldableSchedWrite sched, bit IsCommutable = 0> { - defm Q : avx512_logic_rmb_vl<opc_q, OpcodeStr#"q", OpNode, OpNode, itins, + defm Q : avx512_logic_rmb_vl<opc_q, OpcodeStr#"q", OpNode, OpNode, sched, avx512vl_i64_info, IsCommutable>, VEX_W, EVEX_CD8<64, CD8VF>; - defm D : avx512_logic_rmb_vl<opc_d, OpcodeStr#"d", null_frag, OpNode, itins, + defm D : avx512_logic_rmb_vl<opc_d, OpcodeStr#"d", null_frag, OpNode, sched, avx512vl_i32_info, IsCommutable>, EVEX_CD8<32, CD8VF>; } -defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and, SSE_BIT_ITINS_P, 1>; -defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or, SSE_BIT_ITINS_P, 1>; -defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, SSE_BIT_ITINS_P, 1>; -defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, SSE_BIT_ITINS_P>; +defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and, WriteVecLogic, 1>; +defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or, WriteVecLogic, 1>; +defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor, WriteVecLogic, 1>; +defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp, WriteVecLogic>; //===----------------------------------------------------------------------===// // AVX-512 FP arithmetic //===----------------------------------------------------------------------===// multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, - SDNode OpNode, SDNode VecNode, OpndItins itins, - bit IsCommutable> { + SDNode OpNode, SDNode VecNode, + X86FoldableSchedWrite sched, bit IsCommutable> { let ExeDomain = _.ExeDomain in { defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (VecNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>, - Sched<[itins.Sched]>; + Sched<[sched]>; defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, @@ -4816,13 +4804,13 @@ multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, (_.VT (VecNode _.RC:$src1, _.ScalarIntMemCPat:$src2, (i32 FROUND_CURRENT)))>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; let isCodeGenOnly = 1, Predicates = [HasAVX512] in { def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), (ins _.FRC:$src1, _.FRC:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, - Sched<[itins.Sched]> { + Sched<[sched]> { let isCommutable = IsCommutable; } def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), @@ -4830,44 +4818,45 @@ multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, (_.ScalarLdFrag addr:$src2)))]>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } } } multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, - SDNode VecNode, OpndItins itins, bit IsCommutable = 0> { + SDNode VecNode, X86FoldableSchedWrite sched, + bit IsCommutable = 0> { let ExeDomain = _.ExeDomain in defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, "$rc, $src2, $src1", "$src1, $src2, $rc", (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), (i32 imm:$rc)), IsCommutable>, - EVEX_B, EVEX_RC, Sched<[itins.Sched]>; + EVEX_B, EVEX_RC, Sched<[sched]>; } multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, SDNode OpNode, SDNode VecNode, SDNode SaeNode, - OpndItins itins, bit IsCommutable> { + X86FoldableSchedWrite sched, bit IsCommutable> { let ExeDomain = _.ExeDomain in { defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (VecNode _.RC:$src1, _.RC:$src2))>, - Sched<[itins.Sched]>; + Sched<[sched]>; defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (VecNode _.RC:$src1, _.ScalarIntMemCPat:$src2))>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; let isCodeGenOnly = 1, Predicates = [HasAVX512] in { def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), (ins _.FRC:$src1, _.FRC:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, - Sched<[itins.Sched]> { + Sched<[sched]> { let isCommutable = IsCommutable; } def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), @@ -4875,7 +4864,7 @@ multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, (_.ScalarLdFrag addr:$src2)))]>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), @@ -4883,54 +4872,55 @@ multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, "{sae}, $src2, $src1", "$src1, $src2, {sae}", (SaeNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), (i32 FROUND_NO_EXC))>, EVEX_B, - Sched<[itins.Sched]>; + Sched<[sched]>; } } multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDNode OpNode, - SDNode VecNode, - SizeItins itins, bit IsCommutable> { + SDNode VecNode, X86FoldableSchedWrite sched, + bit IsCommutable> { defm SSZ : avx512_fp_scalar<opc, OpcodeStr#"ss", f32x_info, OpNode, VecNode, - itins.s, IsCommutable>, + sched, IsCommutable>, avx512_fp_scalar_round<opc, OpcodeStr#"ss", f32x_info, VecNode, - itins.s, IsCommutable>, + sched, IsCommutable>, XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm SDZ : avx512_fp_scalar<opc, OpcodeStr#"sd", f64x_info, OpNode, VecNode, - itins.d, IsCommutable>, + sched, IsCommutable>, avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, VecNode, - itins.d, IsCommutable>, + sched, IsCommutable>, XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; } multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode, - SDNode VecNode, SDNode SaeNode, - SizeItins itins, bit IsCommutable> { + SDNode VecNode, SDNode SaeNode, + X86FoldableSchedWrite sched, bit IsCommutable> { defm SSZ : avx512_fp_scalar_sae<opc, OpcodeStr#"ss", f32x_info, OpNode, - VecNode, SaeNode, itins.s, IsCommutable>, + VecNode, SaeNode, sched, IsCommutable>, XS, EVEX_4V, VEX_LIG, EVEX_CD8<32, CD8VT1>; defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode, - VecNode, SaeNode, itins.d, IsCommutable>, + VecNode, SaeNode, sched, IsCommutable>, XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>; } -defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds, SSE_ALU_ITINS_S, 1>; -defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds, SSE_MUL_ITINS_S, 1>; -defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds, SSE_ALU_ITINS_S, 0>; -defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds, SSE_DIV_ITINS_S, 0>; +defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnds, WriteFAdd, 1>; +defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnds, WriteFMul, 1>; +defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnds, WriteFAdd, 0>; +defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnds, WriteFDiv, 0>; defm VMIN : avx512_binop_s_sae <0x5D, "vmin", X86fmin, X86fmins, X86fminRnds, - SSE_ALU_ITINS_S, 0>; + WriteFAdd, 0>; defm VMAX : avx512_binop_s_sae <0x5F, "vmax", X86fmax, X86fmaxs, X86fmaxRnds, - SSE_ALU_ITINS_S, 0>; + WriteFAdd, 0>; // MIN/MAX nodes are commutable under "unsafe-fp-math". In this case we use // X86fminc and X86fmaxc instead of X86fmin and X86fmax multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr, - X86VectorVTInfo _, SDNode OpNode, OpndItins itins> { + X86VectorVTInfo _, SDNode OpNode, + X86FoldableSchedWrite sched> { let isCodeGenOnly = 1, Predicates = [HasAVX512], ExeDomain = _.ExeDomain in { def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst), (ins _.FRC:$src1, _.FRC:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))]>, - Sched<[itins.Sched]> { + Sched<[sched]> { let isCommutable = 1; } def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst), @@ -4938,40 +4928,40 @@ multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr, OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set _.FRC:$dst, (OpNode _.FRC:$src1, (_.ScalarLdFrag addr:$src2)))]>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } } defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc, - SSE_ALU_ITINS_S.s>, XS, EVEX_4V, VEX_LIG, - EVEX_CD8<32, CD8VT1>; + WriteFAdd>, XS, EVEX_4V, VEX_LIG, + EVEX_CD8<32, CD8VT1>; defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc, - SSE_ALU_ITINS_S.d>, XD, VEX_W, EVEX_4V, VEX_LIG, - EVEX_CD8<64, CD8VT1>; + WriteFAdd>, XD, VEX_W, EVEX_4V, VEX_LIG, + EVEX_CD8<64, CD8VT1>; defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc, - SSE_ALU_ITINS_S.s>, XS, EVEX_4V, VEX_LIG, - EVEX_CD8<32, CD8VT1>; + WriteFAdd>, XS, EVEX_4V, VEX_LIG, + EVEX_CD8<32, CD8VT1>; defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc, - SSE_ALU_ITINS_S.d>, XD, VEX_W, EVEX_4V, VEX_LIG, - EVEX_CD8<64, CD8VT1>; + WriteFAdd>, XD, VEX_W, EVEX_4V, VEX_LIG, + EVEX_CD8<64, CD8VT1>; multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, - X86VectorVTInfo _, OpndItins itins, + X86VectorVTInfo _, X86FoldableSchedWrite sched, bit IsCommutable> { let ExeDomain = _.ExeDomain, hasSideEffects = 0 in { defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, _.RC:$src2)), IsCommutable>, - EVEX_4V, Sched<[itins.Sched]>; + EVEX_4V, Sched<[sched]>; let mayLoad = 1 in { defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2", (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>, - EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>; + EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix, "${src2}"##_.BroadcastStr##", $src1", @@ -4979,106 +4969,99 @@ multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpN (OpNode _.RC:$src1, (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src2))))>, EVEX_4V, EVEX_B, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } } } -multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNodeRnd, - OpndItins itins, X86VectorVTInfo _> { +multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, + SDPatternOperator OpNodeRnd, + X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix, "$rc, $src2, $src1", "$src1, $src2, $rc", (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 imm:$rc)))>, - EVEX_4V, EVEX_B, EVEX_RC, Sched<[itins.Sched]>; + EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; } -multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNodeRnd, - OpndItins itins, X86VectorVTInfo _> { +multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, + SDPatternOperator OpNodeRnd, + X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in defm rrb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix, "{sae}, $src2, $src1", "$src1, $src2, {sae}", (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src2, (i32 FROUND_NO_EXC)))>, - EVEX_4V, EVEX_B, Sched<[itins.Sched]>; + EVEX_4V, EVEX_B, Sched<[sched]>; } multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode, - Predicate prd, SizeItins itins, + Predicate prd, X86FoldableSchedWrite sched, bit IsCommutable = 0> { let Predicates = [prd] in { defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info, - itins.s, IsCommutable>, EVEX_V512, PS, + sched, IsCommutable>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info, - itins.d, IsCommutable>, EVEX_V512, PD, VEX_W, + sched, IsCommutable>, EVEX_V512, PD, VEX_W, EVEX_CD8<64, CD8VF>; } // Define only if AVX512VL feature is present. let Predicates = [prd, HasVLX] in { defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info, - itins.s, IsCommutable>, EVEX_V128, PS, + sched, IsCommutable>, EVEX_V128, PS, EVEX_CD8<32, CD8VF>; defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info, - itins.s, IsCommutable>, EVEX_V256, PS, + sched, IsCommutable>, EVEX_V256, PS, EVEX_CD8<32, CD8VF>; defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info, - itins.d, IsCommutable>, EVEX_V128, PD, VEX_W, + sched, IsCommutable>, EVEX_V128, PD, VEX_W, EVEX_CD8<64, CD8VF>; defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info, - itins.d, IsCommutable>, EVEX_V256, PD, VEX_W, + sched, IsCommutable>, EVEX_V256, PD, VEX_W, EVEX_CD8<64, CD8VF>; } } multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, - SizeItins itins> { - defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, itins.s, v16f32_info>, + X86FoldableSchedWrite sched> { + defm PSZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched, v16f32_info>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; - defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, itins.d, v8f64_info>, + defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched, v8f64_info>, EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; } multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, - SizeItins itins> { - defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, itins.s, v16f32_info>, + X86FoldableSchedWrite sched> { + defm PSZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched, v16f32_info>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; - defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, itins.d, v8f64_info>, + defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched, v8f64_info>, EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>; } defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512, - SSE_ALU_ITINS_P, 1>, - avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, SSE_ALU_ITINS_P>; -defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512, - SSE_MUL_ITINS_P, 1>, - avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, SSE_MUL_ITINS_P>; -defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512, SSE_ALU_ITINS_P>, - avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, SSE_ALU_ITINS_P>; -defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512, SSE_DIV_ITINS_P>, - avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, SSE_DIV_ITINS_P>; -defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512, - SSE_ALU_ITINS_P, 0>, - avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, SSE_ALU_ITINS_P>; -defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512, - SSE_ALU_ITINS_P, 0>, - avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, SSE_ALU_ITINS_P>; + WriteFAdd, 1>, + avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd, WriteFAdd>; +defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512, WriteFMul, 1>, + avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd, WriteFMul>; +defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512, WriteFAdd>, + avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd, WriteFAdd>; +defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512, WriteFDiv>, + avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd, WriteFDiv>; +defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512, WriteFAdd, 0>, + avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd, WriteFAdd>; +defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512, WriteFAdd, 0>, + avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd, WriteFAdd>; let isCodeGenOnly = 1 in { - defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512, - SSE_ALU_ITINS_P, 1>; - defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512, - SSE_ALU_ITINS_P, 1>; -} -defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI, - SSE_ALU_ITINS_P, 1>; -defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI, - SSE_ALU_ITINS_P, 0>; -defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI, - SSE_ALU_ITINS_P, 1>; -defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI, - SSE_ALU_ITINS_P, 1>; + defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512, WriteFAdd, 1>; + defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512, WriteFAdd, 1>; +} +defm VAND : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI, WriteFAdd, 1>; +defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI, WriteFAdd, 0>; +defm VOR : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI, WriteFAdd, 1>; +defm VXOR : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI, WriteFAdd, 1>; // Patterns catch floating point selects with bitcasted integer logic ops. multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode, @@ -5185,18 +5168,18 @@ let Predicates = [HasVLX,HasDQI] in { } multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, X86VectorVTInfo _> { + X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>, - EVEX_4V, Sched<[itins.Sched]>; + EVEX_4V, Sched<[sched]>; defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2", (OpNode _.RC:$src1, (_.LdFrag addr:$src2), (i32 FROUND_CURRENT))>, - EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>; + EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix, "${src2}"##_.BroadcastStr##", $src1", @@ -5204,50 +5187,50 @@ multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode, (OpNode _.RC:$src1, (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src2))), (i32 FROUND_CURRENT))>, - EVEX_4V, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>; + EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; } } multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, X86VectorVTInfo _> { + X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm rr: AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, _.RC:$src2, (i32 FROUND_CURRENT)))>, - Sched<[itins.Sched]>; + Sched<[sched]>; defm rm: AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2", (OpNode _.RC:$src1, _.ScalarIntMemCPat:$src2, (i32 FROUND_CURRENT))>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } } multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode, SDNode OpNodeScal> { - defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v16f32_info>, - avx512_fp_round_packed<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v16f32_info>, + defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, WriteFAdd, v16f32_info>, + avx512_fp_round_packed<opc, OpcodeStr, OpNode, WriteFAdd, v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>; - defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v8f64_info>, - avx512_fp_round_packed<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v8f64_info>, + defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, WriteFAdd, v8f64_info>, + avx512_fp_round_packed<opc, OpcodeStr, OpNode, WriteFAdd, v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - defm SSZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, SSE_ALU_F32S, f32x_info>, - avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNodeScal, SSE_ALU_ITINS_S.s>, + defm SSZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, WriteFAdd, f32x_info>, + avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNodeScal, WriteFAdd>, EVEX_4V,EVEX_CD8<32, CD8VT1>; - defm SDZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, SSE_ALU_F64S, f64x_info>, - avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNodeScal, SSE_ALU_ITINS_S.d>, + defm SDZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, WriteFAdd, f64x_info>, + avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNodeScal, WriteFAdd>, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; // Define only if AVX512VL feature is present. let Predicates = [HasVLX] in { - defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v4f32x_info>, + defm PSZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, WriteFAdd, v4f32x_info>, EVEX_V128, EVEX_CD8<32, CD8VF>; - defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F32P, v8f32x_info>, + defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, WriteFAdd, v8f32x_info>, EVEX_V256, EVEX_CD8<32, CD8VF>; - defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v2f64x_info>, + defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, WriteFAdd, v2f64x_info>, EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>; - defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, SSE_ALU_F64P, v4f64x_info>, + defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, WriteFAdd, v4f64x_info>, EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; } } @@ -5258,7 +5241,8 @@ defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs //===----------------------------------------------------------------------===// multiclass avx512_vptest<bits<8> opc, string OpcodeStr, PatFrag OpNode, - OpndItins itins, X86VectorVTInfo _, string Suffix> { + X86FoldableSchedWrite sched, X86VectorVTInfo _, + string Suffix> { let ExeDomain = _.ExeDomain in { let isCommutable = 1 in defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst), @@ -5266,7 +5250,7 @@ multiclass avx512_vptest<bits<8> opc, string OpcodeStr, PatFrag OpNode, "$src2, $src1", "$src1, $src2", (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))), _.ImmAllZerosV)>, - EVEX_4V, Sched<[itins.Sched]>; + EVEX_4V, Sched<[sched]>; defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", @@ -5275,7 +5259,7 @@ multiclass avx512_vptest<bits<8> opc, string OpcodeStr, PatFrag OpNode, (bitconvert (_.LdFrag addr:$src2))))), _.ImmAllZerosV)>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } // Patterns for compare with 0 that just use the same source twice. @@ -5289,7 +5273,7 @@ multiclass avx512_vptest<bits<8> opc, string OpcodeStr, PatFrag OpNode, } multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, PatFrag OpNode, - OpndItins itins, X86VectorVTInfo _> { + X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, @@ -5300,7 +5284,7 @@ multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, PatFrag OpNode, (_.ScalarLdFrag addr:$src2))), _.ImmAllZerosV)>, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } // Use 512bit version to implement 128/256 bit in case NoVLX. @@ -5349,17 +5333,17 @@ multiclass avx512_vptest_lowering<PatFrag OpNode, X86VectorVTInfo ExtendInfo, } multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, PatFrag OpNode, - OpndItins itins, AVX512VLVectorVTInfo _, + X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _, string Suffix> { let Predicates = [HasAVX512] in - defm Z : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info512, Suffix>, - avx512_vptest_mb<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512; + defm Z : avx512_vptest<opc, OpcodeStr, OpNode, sched, _.info512, Suffix>, + avx512_vptest_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in { - defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info256, Suffix>, - avx512_vptest_mb<opc, OpcodeStr, OpNode,itins, _.info256>, EVEX_V256; - defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info128, Suffix>, - avx512_vptest_mb<opc, OpcodeStr, OpNode, itins, _.info128>, EVEX_V128; + defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, sched, _.info256, Suffix>, + avx512_vptest_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256; + defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, sched, _.info128, Suffix>, + avx512_vptest_mb<opc, OpcodeStr, OpNode, sched, _.info128>, EVEX_V128; } let Predicates = [HasAVX512, NoVLX] in { defm Z256_Alt : avx512_vptest_lowering< OpNode, _.info512, _.info256, Suffix>; @@ -5368,30 +5352,30 @@ multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, PatFrag OpNode, } multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, PatFrag OpNode, - OpndItins itins> { - defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode, itins, + X86FoldableSchedWrite sched> { + defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode, sched, avx512vl_i32_info, "D">; - defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", OpNode, itins, + defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", OpNode, sched, avx512vl_i64_info, "Q">, VEX_W; } multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr, - PatFrag OpNode, OpndItins itins> { + PatFrag OpNode, X86FoldableSchedWrite sched> { let Predicates = [HasBWI] in { - defm WZ: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v32i16_info, "W">, + defm WZ: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched, v32i16_info, "W">, EVEX_V512, VEX_W; - defm BZ: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v64i8_info, "B">, + defm BZ: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched, v64i8_info, "B">, EVEX_V512; } let Predicates = [HasVLX, HasBWI] in { - defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v16i16x_info, "W">, + defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched, v16i16x_info, "W">, EVEX_V256, VEX_W; - defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v8i16x_info, "W">, + defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, sched, v8i16x_info, "W">, EVEX_V128, VEX_W; - defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v32i8x_info, "B">, + defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched, v32i8x_info, "B">, EVEX_V256; - defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v16i8x_info, "B">, + defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, sched, v16i8x_info, "B">, EVEX_V128; } @@ -5412,79 +5396,80 @@ def X86pcmpnem : PatFrag<(ops node:$src1, node:$src2), (X86cmpm node:$src1, node:$src2, (i8 4))>; multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr, - PatFrag OpNode, OpndItins itins> : - avx512_vptest_wb <opc_wb, OpcodeStr, OpNode, itins>, - avx512_vptest_dq<opc_dq, OpcodeStr, OpNode, itins>; + PatFrag OpNode, X86FoldableSchedWrite sched> : + avx512_vptest_wb <opc_wb, OpcodeStr, OpNode, sched>, + avx512_vptest_dq<opc_dq, OpcodeStr, OpNode, sched>; defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86pcmpnem, - SSE_BIT_ITINS_P>, T8PD; + WriteVecLogic>, T8PD; defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86pcmpeqm, - SSE_BIT_ITINS_P>, T8XS; + WriteVecLogic>, T8XS; //===----------------------------------------------------------------------===// // AVX-512 Shift instructions //===----------------------------------------------------------------------===// multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM, - string OpcodeStr, SDNode OpNode, OpndItins itins, - X86VectorVTInfo _> { + string OpcodeStr, SDNode OpNode, + X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, (i8 imm:$src2)))>, - Sched<[itins.Sched]>; + Sched<[sched]>; defm mi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), (i8 imm:$src2)))>, - Sched<[itins.Sched.Folded]>; + Sched<[sched.Folded]>; } } multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM, - string OpcodeStr, SDNode OpNode, OpndItins itins, - X86VectorVTInfo _> { + string OpcodeStr, SDNode OpNode, + X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst), (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr, "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2", (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1)), (i8 imm:$src2)))>, - EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>; + EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; } multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, ValueType SrcVT, PatFrag bc_frag, - X86VectorVTInfo _> { + X86FoldableSchedWrite sched, ValueType SrcVT, + PatFrag bc_frag, X86VectorVTInfo _> { // src2 is always 128-bit let ExeDomain = _.ExeDomain in { defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, VR128X:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, (SrcVT VR128X:$src2)))>, - AVX512BIBase, EVEX_4V, Sched<[itins.Sched]>; + AVX512BIBase, EVEX_4V, Sched<[sched]>; defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, i128mem:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2))))>, AVX512BIBase, - EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>; + EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; } } multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, ValueType SrcVT, PatFrag bc_frag, - AVX512VLVectorVTInfo VTInfo, Predicate prd> { + X86FoldableSchedWrite sched, ValueType SrcVT, + PatFrag bc_frag, AVX512VLVectorVTInfo VTInfo, + Predicate prd> { let Predicates = [prd] in - defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, itins, SrcVT, bc_frag, + defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched, SrcVT, bc_frag, VTInfo.info512>, EVEX_V512, EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, itins, SrcVT, bc_frag, + defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched, SrcVT, bc_frag, VTInfo.info256>, EVEX_V256, EVEX_CD8<VTInfo.info256.EltSize, CD8VH>; - defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, itins, SrcVT, bc_frag, + defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched, SrcVT, bc_frag, VTInfo.info128>, EVEX_V128, EVEX_CD8<VTInfo.info128.EltSize, CD8VF>; } @@ -5492,82 +5477,83 @@ multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw, string OpcodeStr, SDNode OpNode, - OpndItins itins> { - defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, itins, v4i32, + X86FoldableSchedWrite sched> { + defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32, bc_v4i32, avx512vl_i32_info, HasAVX512>; - defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, itins, v2i64, + defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64, bc_v2i64, avx512vl_i64_info, HasAVX512>, VEX_W; - defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, itins, v8i16, + defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16, bc_v2i64, avx512vl_i16_info, HasBWI>; } multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, string OpcodeStr, SDNode OpNode, - OpndItins itins, AVX512VLVectorVTInfo VTInfo> { + X86FoldableSchedWrite sched, + AVX512VLVectorVTInfo VTInfo> { let Predicates = [HasAVX512] in - defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, itins, + defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, sched, VTInfo.info512>, - avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, itins, - VTInfo.info512>, EVEX_V512; + avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched, + VTInfo.info512>, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in { - defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, itins, + defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, sched, VTInfo.info256>, - avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, itins, - VTInfo.info256>, EVEX_V256; - defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, - itins, VTInfo.info128>, - avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, itins, - VTInfo.info128>, EVEX_V128; + avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched, + VTInfo.info256>, EVEX_V256; + defm Z128: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, sched, + VTInfo.info128>, + avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, sched, + VTInfo.info128>, EVEX_V128; } } -multiclass avx512_shift_rmi_w<bits<8> opcw, - Format ImmFormR, Format ImmFormM, - string OpcodeStr, SDNode OpNode, - OpndItins itins> { +multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM, + string OpcodeStr, SDNode OpNode, + X86FoldableSchedWrite sched> { let Predicates = [HasBWI] in defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, - itins, v32i16_info>, EVEX_V512, VEX_WIG; + sched, v32i16_info>, EVEX_V512, VEX_WIG; let Predicates = [HasVLX, HasBWI] in { defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, - itins, v16i16x_info>, EVEX_V256, VEX_WIG; + sched, v16i16x_info>, EVEX_V256, VEX_WIG; defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode, - itins, v8i16x_info>, EVEX_V128, VEX_WIG; + sched, v8i16x_info>, EVEX_V128, VEX_WIG; } } multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq, - Format ImmFormR, Format ImmFormM, - string OpcodeStr, SDNode OpNode, OpndItins itins> { + Format ImmFormR, Format ImmFormM, + string OpcodeStr, SDNode OpNode, + X86FoldableSchedWrite sched> { defm D: avx512_shift_rmi_sizes<opcd, ImmFormR, ImmFormM, OpcodeStr#"d", OpNode, - itins, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; + sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode, - itins, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W; + sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W; } defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli, - SSE_INTSHIFT_P>, + WriteVecShift>, avx512_shift_rmi_w<0x71, MRM2r, MRM2m, "vpsrlw", X86vsrli, - SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V; + WriteVecShift>, AVX512BIi8Base, EVEX_4V; defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli, - SSE_INTSHIFT_P>, + WriteVecShift>, avx512_shift_rmi_w<0x71, MRM6r, MRM6m, "vpsllw", X86vshli, - SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V; + WriteVecShift>, AVX512BIi8Base, EVEX_4V; defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai, - SSE_INTSHIFT_P>, + WriteVecShift>, avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai, - SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V; + WriteVecShift>, AVX512BIi8Base, EVEX_4V; defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri, - SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V; + WriteVecShift>, AVX512BIi8Base, EVEX_4V; defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli, - SSE_INTSHIFT_P>, AVX512BIi8Base, EVEX_4V; + WriteVecShift>, AVX512BIi8Base, EVEX_4V; -defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl, SSE_INTSHIFT_P>; -defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra, SSE_INTSHIFT_P>; -defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl, SSE_INTSHIFT_P>; +defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl, WriteVecShift>; +defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra, WriteVecShift>; +defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl, WriteVecShift>; // Use 512bit VPSRA/VPSRAI version to implement v2i64/v4i64 in case NoVLX. let Predicates = [HasAVX512, NoVLX] in { @@ -5600,25 +5586,25 @@ let Predicates = [HasAVX512, NoVLX] in { // Variable Bit Shifts //===-------------------------------------------------------------------===// multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, X86VectorVTInfo _> { + X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, (_.VT _.RC:$src2)))>, - AVX5128IBase, EVEX_4V, Sched<[itins.Sched]>; + AVX5128IBase, EVEX_4V, Sched<[sched]>; defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src2)))))>, AVX5128IBase, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } } multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, X86VectorVTInfo _> { + X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, @@ -5627,28 +5613,28 @@ multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode, (_.VT (OpNode _.RC:$src1, (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src2)))))>, AVX5128IBase, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } multiclass avx512_var_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, AVX512VLVectorVTInfo _> { + X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { let Predicates = [HasAVX512] in - defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info512>, - avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512; + defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>, + avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in { - defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info256>, - avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info256>, EVEX_V256; - defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info128>, - avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info128>, EVEX_V128; + defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>, + avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256; + defm Z128 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>, + avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info128>, EVEX_V128; } } multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr, - SDNode OpNode, OpndItins itins> { - defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, itins, + SDNode OpNode, X86FoldableSchedWrite sched> { + defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched, avx512vl_i32_info>; - defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, itins, + defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched, avx512vl_i64_info>, VEX_W; } @@ -5674,35 +5660,30 @@ multiclass avx512_var_shift_lowering<AVX512VLVectorVTInfo _, string OpcodeStr, } } multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr, - SDNode OpNode, OpndItins itins> { + SDNode OpNode, X86FoldableSchedWrite sched> { let Predicates = [HasBWI] in - defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v32i16_info>, + defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, sched, v32i16_info>, EVEX_V512, VEX_W; let Predicates = [HasVLX, HasBWI] in { - defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v16i16x_info>, + defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, v16i16x_info>, EVEX_V256, VEX_W; - defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v8i16x_info>, + defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, v8i16x_info>, EVEX_V128, VEX_W; } } -let Sched = WriteVarVecShift in -def AVX512_VARSHIFT_P : OpndItins< - NoItinerary, NoItinerary ->; - -defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl, AVX512_VARSHIFT_P>, - avx512_var_shift_w<0x12, "vpsllvw", shl, AVX512_VARSHIFT_P>; +defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl, WriteVarVecShift>, + avx512_var_shift_w<0x12, "vpsllvw", shl, WriteVarVecShift>; -defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra, AVX512_VARSHIFT_P>, - avx512_var_shift_w<0x11, "vpsravw", sra, AVX512_VARSHIFT_P>; +defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra, WriteVarVecShift>, + avx512_var_shift_w<0x11, "vpsravw", sra, WriteVarVecShift>; -defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl, AVX512_VARSHIFT_P>, - avx512_var_shift_w<0x10, "vpsrlvw", srl, AVX512_VARSHIFT_P>; +defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl, WriteVarVecShift>, + avx512_var_shift_w<0x10, "vpsrlvw", srl, WriteVarVecShift>; -defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, AVX512_VARSHIFT_P>; -defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, AVX512_VARSHIFT_P>; +defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr, WriteVarVecShift>; +defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl, WriteVarVecShift>; defm : avx512_var_shift_lowering<avx512vl_i64_info, "VPSRAVQ", sra, [HasAVX512, NoVLX]>; defm : avx512_var_shift_lowering<avx512vl_i16_info, "VPSLLVW", shl, [HasBWI, NoVLX]>; @@ -5880,78 +5861,78 @@ let Predicates = [HasAVX512, NoVLX] in { // 1-src variable permutation VPERMW/D/Q //===-------------------------------------------------------------------===// multiclass avx512_vperm_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, AVX512VLVectorVTInfo _> { + X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { let Predicates = [HasAVX512] in - defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info512>, - avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512; + defm Z : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>, + avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in - defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info256>, - avx512_var_shift_mb<opc, OpcodeStr, OpNode, itins, _.info256>, EVEX_V256; + defm Z256 : avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>, + avx512_var_shift_mb<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256; } multiclass avx512_vpermi_dq_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, string OpcodeStr, SDNode OpNode, - OpndItins itins, AVX512VLVectorVTInfo VTInfo> { + X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> { let Predicates = [HasAVX512] in defm Z: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, - itins, VTInfo.info512>, + sched, VTInfo.info512>, avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, - itins, VTInfo.info512>, EVEX_V512; + sched, VTInfo.info512>, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in defm Z256: avx512_shift_rmi<opc, ImmFormR, ImmFormM, OpcodeStr, OpNode, - itins, VTInfo.info256>, + sched, VTInfo.info256>, avx512_shift_rmbi<opc, ImmFormM, OpcodeStr, OpNode, - itins, VTInfo.info256>, EVEX_V256; + sched, VTInfo.info256>, EVEX_V256; } multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr, Predicate prd, SDNode OpNode, - OpndItins itins, AVX512VLVectorVTInfo _> { + X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { let Predicates = [prd] in - defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info512>, + defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512 ; let Predicates = [HasVLX, prd] in { - defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info256>, + defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256 ; - defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, itins, _.info128>, + defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, _.info128>, EVEX_V128 ; } } defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv, - AVX2_PERMV_I, avx512vl_i16_info>, VEX_W; + WriteVarShuffle256, avx512vl_i16_info>, VEX_W; defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv, - AVX2_PERMV_I, avx512vl_i8_info>; + WriteVarShuffle256, avx512vl_i8_info>; defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv, - AVX2_PERMV_I, avx512vl_i32_info>; + WriteVarShuffle256, avx512vl_i32_info>; defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv, - AVX2_PERMV_I, avx512vl_i64_info>, VEX_W; + WriteVarShuffle256, avx512vl_i64_info>, VEX_W; defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv, - AVX2_PERMV_F, avx512vl_f32_info>; + WriteFVarShuffle256, avx512vl_f32_info>; defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv, - AVX2_PERMV_F, avx512vl_f64_info>, VEX_W; + WriteFVarShuffle256, avx512vl_f64_info>, VEX_W; defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq", - X86VPermi, AVX2_PERMV_I, avx512vl_i64_info>, + X86VPermi, WriteVarShuffle256, avx512vl_i64_info>, EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd", - X86VPermi, AVX2_PERMV_F, avx512vl_f64_info>, + X86VPermi, WriteFVarShuffle256, avx512vl_f64_info>, EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; //===----------------------------------------------------------------------===// // AVX-512 - VPERMIL //===----------------------------------------------------------------------===// multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode, - OpndItins itins, X86VectorVTInfo _, + X86FoldableSchedWrite sched, X86VectorVTInfo _, X86VectorVTInfo Ctrl> { defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (OpNode _.RC:$src1, (Ctrl.VT Ctrl.RC:$src2)))>, - T8PD, EVEX_4V, Sched<[itins.Sched]>; + T8PD, EVEX_4V, Sched<[sched]>; defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", @@ -5959,7 +5940,7 @@ multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode, _.RC:$src1, (Ctrl.VT (bitconvert(Ctrl.LdFrag addr:$src2)))))>, T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, "${src2}"##_.BroadcastStr##", $src1", @@ -5969,29 +5950,29 @@ multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode, (Ctrl.VT (X86VBroadcast (Ctrl.ScalarLdFrag addr:$src2)))))>, T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar, - OpndItins itins, AVX512VLVectorVTInfo _, + X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl> { let Predicates = [HasAVX512] in { - defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, itins, + defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched, _.info512, Ctrl.info512>, EVEX_V512; } let Predicates = [HasAVX512, HasVLX] in { - defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, itins, + defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched, _.info128, Ctrl.info128>, EVEX_V128; - defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, itins, + defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, sched, _.info256, Ctrl.info256>, EVEX_V256; } } multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar, AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{ - defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, AVX_VPERMILV, _, Ctrl>; + defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, WriteFVarShuffle, _, Ctrl>; defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr, - X86VPermilpi, AVX_VPERMILV, _>, + X86VPermilpi, WriteFVarShuffle, _>, EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>; } @@ -6007,25 +5988,25 @@ defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info, //===----------------------------------------------------------------------===// defm VPSHUFD : avx512_shift_rmi_sizes<0x70, MRMSrcReg, MRMSrcMem, "vpshufd", - X86PShufd, SSE_PSHUF, avx512vl_i32_info>, + X86PShufd, WriteShuffle, avx512vl_i32_info>, EVEX, AVX512BIi8Base, EVEX_CD8<32, CD8VF>; defm VPSHUFH : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshufhw", - X86PShufhw, SSE_PSHUF>, EVEX, AVX512XSIi8Base; + X86PShufhw, WriteShuffle>, EVEX, AVX512XSIi8Base; defm VPSHUFL : avx512_shift_rmi_w<0x70, MRMSrcReg, MRMSrcMem, "vpshuflw", - X86PShuflw, SSE_PSHUF>, EVEX, AVX512XDIi8Base; + X86PShuflw, WriteShuffle>, EVEX, AVX512XDIi8Base; multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins> { + X86FoldableSchedWrite sched> { let Predicates = [HasBWI] in - defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v64i8_info>, EVEX_V512; + defm Z: avx512_var_shift<opc, OpcodeStr, OpNode, sched, v64i8_info>, EVEX_V512; let Predicates = [HasVLX, HasBWI] in { - defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v32i8x_info>, EVEX_V256; - defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, itins, v16i8x_info>, EVEX_V128; + defm Z256: avx512_var_shift<opc, OpcodeStr, OpNode, sched, v32i8x_info>, EVEX_V256; + defm Z128: avx512_var_shift<opc, OpcodeStr, OpNode, sched, v16i8x_info>, EVEX_V128; } } -defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb, SSE_PSHUFB>, VEX_WIG; +defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb, WriteVarShuffle>, VEX_WIG; //===----------------------------------------------------------------------===// // Move Low to High and High to Low packed FP Instructions @@ -6455,7 +6436,7 @@ defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86Fnmsubs1, //===----------------------------------------------------------------------===// let Constraints = "$src1 = $dst" in { multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, X86VectorVTInfo _> { + X86FoldableSchedWrite sched, X86VectorVTInfo _> { // NOTE: The SDNode have the multiply operands first with the add last. // This enables commuted load patterns to be autogenerated by tablegen. let ExeDomain = _.ExeDomain in { @@ -6463,13 +6444,13 @@ multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, (ins _.RC:$src2, _.RC:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>, - AVX512FMA3Base, Sched<[itins.Sched]>; + AVX512FMA3Base, Sched<[sched]>; defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src2, _.MemOp:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>, - AVX512FMA3Base, Sched<[itins.Sched.Folded, ReadAfterLd]>; + AVX512FMA3Base, Sched<[sched.Folded, ReadAfterLd]>; defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src2, _.ScalarMemOp:$src3), @@ -6478,47 +6459,47 @@ multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, (OpNode _.RC:$src2, (_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))), _.RC:$src1)>, - AVX512FMA3Base, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>; + AVX512FMA3Base, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; } } } // Constraints = "$src1 = $dst" multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, AVX512VLVectorVTInfo _> { + X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { let Predicates = [HasIFMA] in { - defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, itins, _.info512>, + defm Z : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>; } let Predicates = [HasVLX, HasIFMA] in { - defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, itins, _.info256>, + defm Z256 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>; - defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, itins, _.info128>, + defm Z128 : avx512_pmadd52_rm<opc, OpcodeStr, OpNode, sched, _.info128>, EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>; } } defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l, - SSE_PMADD, avx512vl_i64_info>, VEX_W; + WriteVecIMul, avx512vl_i64_info>, VEX_W; defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h, - SSE_PMADD, avx512vl_i64_info>, VEX_W; + WriteVecIMul, avx512vl_i64_info>, VEX_W; //===----------------------------------------------------------------------===// // AVX-512 Scalar convert from sign integer to float/double //===----------------------------------------------------------------------===// -multiclass avx512_vcvtsi<bits<8> opc, SDNode OpNode, OpndItins itins, +multiclass avx512_vcvtsi<bits<8> opc, SDNode OpNode, X86FoldableSchedWrite sched, RegisterClass SrcRC, X86VectorVTInfo DstVT, X86MemOperand x86memop, PatFrag ld_frag, string asm> { let hasSideEffects = 0 in { def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst), (ins DstVT.FRC:$src1, SrcRC:$src), !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, - EVEX_4V, Sched<[itins.Sched]>; + EVEX_4V, Sched<[sched]>; let mayLoad = 1 in def rm : SI<opc, MRMSrcMem, (outs DstVT.FRC:$dst), (ins DstVT.FRC:$src1, x86memop:$src), !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, - EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>; + EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; } // hasSideEffects = 0 let isCodeGenOnly = 1 in { def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), @@ -6528,7 +6509,7 @@ multiclass avx512_vcvtsi<bits<8> opc, SDNode OpNode, OpndItins itins, (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2, (i32 FROUND_CURRENT)))]>, - EVEX_4V, Sched<[itins.Sched]>; + EVEX_4V, Sched<[sched]>; def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins DstVT.RC:$src1, x86memop:$src2), @@ -6537,12 +6518,13 @@ multiclass avx512_vcvtsi<bits<8> opc, SDNode OpNode, OpndItins itins, (OpNode (DstVT.VT DstVT.RC:$src1), (ld_frag addr:$src2), (i32 FROUND_CURRENT)))]>, - EVEX_4V, Sched<[itins.Sched.Folded, ReadAfterLd]>; + EVEX_4V, Sched<[sched.Folded, ReadAfterLd]>; }//isCodeGenOnly = 1 } -multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, OpndItins itins, - RegisterClass SrcRC, X86VectorVTInfo DstVT, string asm> { +multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, + X86FoldableSchedWrite sched, RegisterClass SrcRC, + X86VectorVTInfo DstVT, string asm> { def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), !strconcat(asm, @@ -6551,28 +6533,29 @@ multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, OpndItins itins, (OpNode (DstVT.VT DstVT.RC:$src1), SrcRC:$src2, (i32 imm:$rc)))]>, - EVEX_4V, EVEX_B, EVEX_RC, Sched<[itins.Sched]>; + EVEX_4V, EVEX_B, EVEX_RC, Sched<[sched]>; } -multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, OpndItins itins, - RegisterClass SrcRC, X86VectorVTInfo DstVT, - X86MemOperand x86memop, PatFrag ld_frag, string asm> { - defm NAME : avx512_vcvtsi_round<opc, OpNode, itins, SrcRC, DstVT, asm>, - avx512_vcvtsi<opc, OpNode, itins, SrcRC, DstVT, x86memop, +multiclass avx512_vcvtsi_common<bits<8> opc, SDNode OpNode, + X86FoldableSchedWrite sched, + RegisterClass SrcRC, X86VectorVTInfo DstVT, + X86MemOperand x86memop, PatFrag ld_frag, string asm> { + defm NAME : avx512_vcvtsi_round<opc, OpNode, sched, SrcRC, DstVT, asm>, + avx512_vcvtsi<opc, OpNode, sched, SrcRC, DstVT, x86memop, ld_frag, asm>, VEX_LIG; } let Predicates = [HasAVX512] in { -defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SS, GR32, +defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2F, GR32, v4f32x_info, i32mem, loadi32, "cvtsi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>; -defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SS, GR64, +defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2F, GR64, v4f32x_info, i64mem, loadi64, "cvtsi2ss{q}">, XS, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VCVTSI2SDZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SD, GR32, +defm VCVTSI2SDZ : avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2F, GR32, v2f64x_info, i32mem, loadi32, "cvtsi2sd{l}">, XD, EVEX_CD8<32, CD8VT1>; -defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, SSE_CVT_SI2SD, GR64, +defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, WriteCvtI2F, GR64, v2f64x_info, i64mem, loadi64, "cvtsi2sd{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; @@ -6599,16 +6582,16 @@ def : Pat<(f64 (sint_to_fp GR32:$src)), def : Pat<(f64 (sint_to_fp GR64:$src)), (VCVTSI642SDZrr (f64 (IMPLICIT_DEF)), GR64:$src)>; -defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, SSE_CVT_SI2SS, GR32, +defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2F, GR32, v4f32x_info, i32mem, loadi32, "cvtusi2ss{l}">, XS, EVEX_CD8<32, CD8VT1>; -defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, SSE_CVT_SI2SS, GR64, +defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2F, GR64, v4f32x_info, i64mem, loadi64, "cvtusi2ss{q}">, XS, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, X86UintToFpRnd, SSE_CVT_SI2SD, GR32, v2f64x_info, +defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, X86UintToFpRnd, WriteCvtI2F, GR32, v2f64x_info, i32mem, loadi32, "cvtusi2sd{l}">, XD, VEX_LIG, EVEX_CD8<32, CD8VT1>; -defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, SSE_CVT_SI2SD, GR64, +defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, WriteCvtI2F, GR64, v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; @@ -6642,26 +6625,26 @@ def : Pat<(f64 (uint_to_fp GR64:$src)), multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT, X86VectorVTInfo DstVT, SDNode OpNode, - OpndItins itins, string asm, + X86FoldableSchedWrite sched, string asm, string aliasStr, bit CodeGenOnly = 1> { let Predicates = [HasAVX512] in { def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src), !strconcat(asm,"\t{$src, $dst|$dst, $src}"), [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 FROUND_CURRENT)))]>, - EVEX, VEX_LIG, Sched<[itins.Sched]>; + EVEX, VEX_LIG, Sched<[sched]>; def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc), !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"), [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 imm:$rc)))]>, EVEX, VEX_LIG, EVEX_B, EVEX_RC, - Sched<[itins.Sched]>; + Sched<[sched]>; let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in def rm_Int : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.IntScalarMemOp:$src), !strconcat(asm,"\t{$src, $dst|$dst, $src}"), [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.ScalarIntMemCPat:$src), (i32 FROUND_CURRENT)))]>, - EVEX, VEX_LIG, Sched<[itins.Sched.Folded, ReadAfterLd]>; + EVEX, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>; def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", (!cast<Instruction>(NAME # "rr_Int") DstVT.RC:$dst, SrcVT.RC:$src), 0>; @@ -6672,9 +6655,9 @@ multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT, multiclass avx512_cvt_s_int_round_aliases<bits<8> opc, X86VectorVTInfo SrcVT, X86VectorVTInfo DstVT, SDNode OpNode, - OpndItins itins, string asm, + X86FoldableSchedWrite sched, string asm, string aliasStr> : - avx512_cvt_s_int_round<opc, SrcVT, DstVT, OpNode, itins, asm, aliasStr, 0> { + avx512_cvt_s_int_round<opc, SrcVT, DstVT, OpNode, sched, asm, aliasStr, 0> { let Predicates = [HasAVX512] in { def : InstAlias<"v" # asm # aliasStr # "\t{$src, $dst|$dst, $src}", (!cast<Instruction>(NAME # "rm_Int") DstVT.RC:$dst, @@ -6684,28 +6667,28 @@ multiclass avx512_cvt_s_int_round_aliases<bits<8> opc, X86VectorVTInfo SrcVT, // Convert float/double to signed/unsigned int 32/64 defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info, - X86cvts2si, SSE_CVT_SS2SI_32, "cvtss2si", "{l}">, + X86cvts2si, WriteCvtF2I, "cvtss2si", "{l}">, XS, EVEX_CD8<32, CD8VT1>; defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, - X86cvts2si, SSE_CVT_SS2SI_64, "cvtss2si", "{q}">, + X86cvts2si, WriteCvtF2I, "cvtss2si", "{q}">, XS, VEX_W, EVEX_CD8<32, CD8VT1>; defm VCVTSS2USIZ: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i32x_info, - X86cvts2usi, SSE_CVT_SS2SI_32, "cvtss2usi", "{l}">, + X86cvts2usi, WriteCvtF2I, "cvtss2usi", "{l}">, XS, EVEX_CD8<32, CD8VT1>; defm VCVTSS2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f32x_info, i64x_info, - X86cvts2usi, SSE_CVT_SS2SI_64, "cvtss2usi", "{q}">, + X86cvts2usi, WriteCvtF2I, "cvtss2usi", "{q}">, XS, VEX_W, EVEX_CD8<32, CD8VT1>; defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, - X86cvts2si, SSE_CVT_SD2SI, "cvtsd2si", "{l}">, + X86cvts2si, WriteCvtF2I, "cvtsd2si", "{l}">, XD, EVEX_CD8<64, CD8VT1>; defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, - X86cvts2si, SSE_CVT_SD2SI, "cvtsd2si", "{q}">, + X86cvts2si, WriteCvtF2I, "cvtsd2si", "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; defm VCVTSD2USIZ: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i32x_info, - X86cvts2usi, SSE_CVT_SD2SI, "cvtsd2usi", "{l}">, + X86cvts2usi, WriteCvtF2I, "cvtsd2usi", "{l}">, XD, EVEX_CD8<64, CD8VT1>; defm VCVTSD2USI64Z: avx512_cvt_s_int_round_aliases<0x79, f64x_info, i64x_info, - X86cvts2usi, SSE_CVT_SD2SI, "cvtsd2usi", "{q}">, + X86cvts2usi, WriteCvtF2I, "cvtsd2usi", "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; // The SSE version of these instructions are disabled for AVX512. @@ -6779,30 +6762,30 @@ def : Pat<(v2f64 (X86Movsd // Convert float/double to signed/unsigned int 32/64 with truncation multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC, X86VectorVTInfo _DstRC, SDNode OpNode, - SDNode OpNodeRnd, OpndItins itins, string aliasStr, - bit CodeGenOnly = 1>{ + SDNode OpNodeRnd, X86FoldableSchedWrite sched, + string aliasStr, bit CodeGenOnly = 1>{ let Predicates = [HasAVX512] in { let isCodeGenOnly = 1 in { def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src), !strconcat(asm,"\t{$src, $dst|$dst, $src}"), [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>, - EVEX, Sched<[itins.Sched]>; + EVEX, Sched<[sched]>; def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src), !strconcat(asm,"\t{$src, $dst|$dst, $src}"), [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>, - EVEX, Sched<[itins.Sched.Folded, ReadAfterLd]>; + EVEX, Sched<[sched.Folded, ReadAfterLd]>; } def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), !strconcat(asm,"\t{$src, $dst|$dst, $src}"), [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src), (i32 FROUND_CURRENT)))]>, - EVEX, VEX_LIG, Sched<[itins.Sched]>; + EVEX, VEX_LIG, Sched<[sched]>; def rrb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src), !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"), [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src), (i32 FROUND_NO_EXC)))]>, - EVEX,VEX_LIG , EVEX_B, Sched<[itins.Sched]>; + EVEX,VEX_LIG , EVEX_B, Sched<[sched]>; let isCodeGenOnly = CodeGenOnly, ForceDisassemble = CodeGenOnly in def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.IntScalarMemOp:$src), @@ -6810,7 +6793,7 @@ let Predicates = [HasAVX512] in { [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.ScalarIntMemCPat:$src), (i32 FROUND_CURRENT)))]>, - EVEX, VEX_LIG, Sched<[itins.Sched.Folded, ReadAfterLd]>; + EVEX, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>; def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}", (!cast<Instruction>(NAME # "rr_Int") _DstRC.RC:$dst, _SrcRC.RC:$src), 0>; @@ -6822,9 +6805,9 @@ let Predicates = [HasAVX512] in { multiclass avx512_cvt_s_all_unsigned<bits<8> opc, string asm, X86VectorVTInfo _SrcRC, X86VectorVTInfo _DstRC, SDNode OpNode, - SDNode OpNodeRnd, OpndItins itins, + SDNode OpNodeRnd, X86FoldableSchedWrite sched, string aliasStr> : - avx512_cvt_s_all<opc, asm, _SrcRC, _DstRC, OpNode, OpNodeRnd, itins, + avx512_cvt_s_all<opc, asm, _SrcRC, _DstRC, OpNode, OpNodeRnd, sched, aliasStr, 0> { let Predicates = [HasAVX512] in { def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}", @@ -6834,29 +6817,29 @@ let Predicates = [HasAVX512] in { } defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info, - fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SS2SI_32, "{l}">, + fp_to_sint, X86cvtts2IntRnd, WriteCvtF2I, "{l}">, XS, EVEX_CD8<32, CD8VT1>; defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info, - fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SS2SI_64, "{q}">, + fp_to_sint, X86cvtts2IntRnd, WriteCvtF2I, "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>; defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info, - fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SD2SI, "{l}">, + fp_to_sint, X86cvtts2IntRnd, WriteCvtF2I, "{l}">, XD, EVEX_CD8<64, CD8VT1>; defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info, - fp_to_sint, X86cvtts2IntRnd, SSE_CVT_SD2SI, "{q}">, + fp_to_sint, X86cvtts2IntRnd, WriteCvtF2I, "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>; defm VCVTTSS2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i32x_info, - fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SS2SI_32, "{l}">, + fp_to_uint, X86cvtts2UIntRnd, WriteCvtF2I, "{l}">, XS, EVEX_CD8<32, CD8VT1>; defm VCVTTSS2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttss2usi", f32x_info, i64x_info, - fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SS2SI_64, "{q}">, + fp_to_uint, X86cvtts2UIntRnd, WriteCvtF2I, "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>; defm VCVTTSD2USIZ: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i32x_info, - fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SD2SI, "{l}">, + fp_to_uint, X86cvtts2UIntRnd, WriteCvtF2I, "{l}">, XD, EVEX_CD8<64, CD8VT1>; defm VCVTTSD2USI64Z: avx512_cvt_s_all_unsigned<0x78, "vcvttsd2usi", f64x_info, i64x_info, - fp_to_uint, X86cvtts2UIntRnd, SSE_CVT_SD2SI, "{q}">, + fp_to_uint, X86cvtts2UIntRnd, WriteCvtF2I, "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; let Predicates = [HasAVX512] in { @@ -6883,14 +6866,15 @@ let Predicates = [HasAVX512] in { //===----------------------------------------------------------------------===// multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, - X86VectorVTInfo _Src, SDNode OpNode, OpndItins itins> { + X86VectorVTInfo _Src, SDNode OpNode, + X86FoldableSchedWrite sched> { defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (_.VT (OpNode (_.VT _.RC:$src1), (_Src.VT _Src.RC:$src2), (i32 FROUND_CURRENT)))>, - EVEX_4V, VEX_LIG, Sched<[itins.Sched]>; + EVEX_4V, VEX_LIG, Sched<[sched]>; defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _Src.IntScalarMemOp:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", @@ -6898,68 +6882,70 @@ multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _ (_Src.VT _Src.ScalarIntMemCPat:$src2), (i32 FROUND_CURRENT)))>, EVEX_4V, VEX_LIG, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; let isCodeGenOnly = 1, hasSideEffects = 0 in { def rr : I<opc, MRMSrcReg, (outs _.FRC:$dst), (ins _.FRC:$src1, _Src.FRC:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - EVEX_4V, VEX_LIG, Sched<[itins.Sched]>; + EVEX_4V, VEX_LIG, Sched<[sched]>; let mayLoad = 1 in def rm : I<opc, MRMSrcMem, (outs _.FRC:$dst), (ins _.FRC:$src1, _Src.ScalarMemOp:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - EVEX_4V, VEX_LIG, Sched<[itins.Sched.Folded, ReadAfterLd]>; + EVEX_4V, VEX_LIG, Sched<[sched.Folded, ReadAfterLd]>; } } // Scalar Coversion with SAE - suppress all exceptions multiclass avx512_cvt_fp_sae_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, - X86VectorVTInfo _Src, SDNode OpNodeRnd, OpndItins itins> { + X86VectorVTInfo _Src, SDNode OpNodeRnd, + X86FoldableSchedWrite sched> { defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr, "{sae}, $src2, $src1", "$src1, $src2, {sae}", (_.VT (OpNodeRnd (_.VT _.RC:$src1), (_Src.VT _Src.RC:$src2), (i32 FROUND_NO_EXC)))>, - EVEX_4V, VEX_LIG, EVEX_B, Sched<[itins.Sched]>; + EVEX_4V, VEX_LIG, EVEX_B, Sched<[sched]>; } // Scalar Conversion with rounding control (RC) multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, - X86VectorVTInfo _Src, SDNode OpNodeRnd, OpndItins itins> { + X86VectorVTInfo _Src, SDNode OpNodeRnd, + X86FoldableSchedWrite sched> { defm rrb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _Src.RC:$src2, AVX512RC:$rc), OpcodeStr, "$rc, $src2, $src1", "$src1, $src2, $rc", (_.VT (OpNodeRnd (_.VT _.RC:$src1), (_Src.VT _Src.RC:$src2), (i32 imm:$rc)))>, - EVEX_4V, VEX_LIG, Sched<[itins.Sched]>, + EVEX_4V, VEX_LIG, Sched<[sched]>, EVEX_B, EVEX_RC; } multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr, - SDNode OpNodeRnd, OpndItins itins, + SDNode OpNodeRnd, X86FoldableSchedWrite sched, X86VectorVTInfo _src, X86VectorVTInfo _dst> { let Predicates = [HasAVX512] in { - defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, itins>, + defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>, avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src, - OpNodeRnd, itins>, VEX_W, EVEX_CD8<64, CD8VT1>, XD; + OpNodeRnd, sched>, VEX_W, EVEX_CD8<64, CD8VT1>, XD; } } -multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr, - SDNode OpNodeRnd, OpndItins itins, - X86VectorVTInfo _src, X86VectorVTInfo _dst> { +multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd, + X86FoldableSchedWrite sched, + X86VectorVTInfo _src, X86VectorVTInfo _dst> { let Predicates = [HasAVX512] in { - defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, itins>, - avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, itins>, + defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>, + avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd, sched>, EVEX_CD8<32, CD8VT1>, XS; } } defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", - X86froundRnd, SSE_CVT_SD2SS, f64x_info, + X86froundRnd, WriteCvtF2F, f64x_info, f32x_info>, NotMemoryFoldable; defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", - X86fpextRnd, SSE_CVT_SS2SD, f32x_info, + X86fpextRnd, WriteCvtF2F, f32x_info, f64x_info>, NotMemoryFoldable; def : Pat<(f64 (fpextend FR32X:$src)), @@ -7001,20 +6987,21 @@ def : Pat<(v2f64 (X86Movsd //===----------------------------------------------------------------------===// multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, - X86VectorVTInfo _Src, SDNode OpNode, OpndItins itins, - string Broadcast = _.BroadcastStr, - string Alias = "", X86MemOperand MemOp = _Src.MemOp> { + X86VectorVTInfo _Src, SDNode OpNode, + X86FoldableSchedWrite sched, + string Broadcast = _.BroadcastStr, + string Alias = "", X86MemOperand MemOp = _Src.MemOp> { defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _Src.RC:$src), OpcodeStr, "$src", "$src", (_.VT (OpNode (_Src.VT _Src.RC:$src)))>, - EVEX, Sched<[itins.Sched]>; + EVEX, Sched<[sched]>; defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins MemOp:$src), OpcodeStr#Alias, "$src", "$src", (_.VT (OpNode (_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>, - EVEX, Sched<[itins.Sched.Folded]>; + EVEX, Sched<[sched.Folded]>; defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _Src.ScalarMemOp:$src), OpcodeStr, @@ -7022,60 +7009,60 @@ multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, (_.VT (OpNode (_Src.VT (X86VBroadcast (_Src.ScalarLdFrag addr:$src))) ))>, EVEX, EVEX_B, - Sched<[itins.Sched.Folded]>; + Sched<[sched.Folded]>; } // Coversion with SAE - suppress all exceptions multiclass avx512_vcvt_fp_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, X86VectorVTInfo _Src, SDNode OpNodeRnd, - OpndItins itins> { + X86FoldableSchedWrite sched> { defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _Src.RC:$src), OpcodeStr, "{sae}, $src", "$src, {sae}", (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 FROUND_NO_EXC)))>, - EVEX, EVEX_B, Sched<[itins.Sched]>; + EVEX, EVEX_B, Sched<[sched]>; } // Conversion with rounding control (RC) multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, X86VectorVTInfo _Src, SDNode OpNodeRnd, - OpndItins itins> { + X86FoldableSchedWrite sched> { defm rrb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _Src.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc", (_.VT (OpNodeRnd (_Src.VT _Src.RC:$src), (i32 imm:$rc)))>, - EVEX, EVEX_B, EVEX_RC, Sched<[itins.Sched]>; + EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; } // Extend Float to Double multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr, - OpndItins itins> { + X86FoldableSchedWrite sched> { let Predicates = [HasAVX512] in { defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8f32x_info, - fpextend, itins>, + fpextend, sched>, avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info, - X86vfpextRnd, itins>, EVEX_V512; + X86vfpextRnd, sched>, EVEX_V512; } let Predicates = [HasVLX] in { defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4f32x_info, - X86vfpext, itins, "{1to2}", "", f64mem>, EVEX_V128; + X86vfpext, sched, "{1to2}", "", f64mem>, EVEX_V128; defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend, - itins>, EVEX_V256; + sched>, EVEX_V256; } } // Truncate Double to Float -multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, OpndItins itins> { +multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched> { let Predicates = [HasAVX512] in { - defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fpround, itins>, + defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fpround, sched>, avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info, - X86vfproundRnd, itins>, EVEX_V512; + X86vfproundRnd, sched>, EVEX_V512; } let Predicates = [HasVLX] in { defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info, - X86vfpround, itins, "{1to2}", "{x}">, EVEX_V128; + X86vfpround, sched, "{1to2}", "{x}">, EVEX_V128; defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fpround, - itins, "{1to4}", "{y}">, EVEX_V256; + sched, "{1to4}", "{y}">, EVEX_V256; def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>; @@ -7088,9 +7075,9 @@ multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr, OpndItins itins> { } } -defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", SSE_CVT_PD2PS>, +defm VCVTPD2PS : avx512_cvtpd2ps<0x5A, "vcvtpd2ps", WriteCvtF2F>, VEX_W, PD, EVEX_CD8<64, CD8VF>; -defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", SSE_CVT_PS2PD>, +defm VCVTPS2PD : avx512_cvtps2pd<0x5A, "vcvtps2pd", WriteCvtF2F>, PS, EVEX_CD8<32, CD8VH>; def : Pat<(v8f64 (extloadv8f32 addr:$src)), @@ -7113,80 +7100,80 @@ let Predicates = [HasVLX] in { // Convert Signed/Unsigned Doubleword to Double multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode, - SDNode OpNode128, OpndItins itins> { + SDNode OpNode128, X86FoldableSchedWrite sched> { // No rounding in this op let Predicates = [HasAVX512] in defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i32x_info, OpNode, - itins>, EVEX_V512; + sched>, EVEX_V512; let Predicates = [HasVLX] in { defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info, - OpNode128, itins, "{1to2}", "", i64mem>, EVEX_V128; + OpNode128, sched, "{1to2}", "", i64mem>, EVEX_V128; defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode, - itins>, EVEX_V256; + sched>, EVEX_V256; } } // Convert Signed/Unsigned Doubleword to Float multiclass avx512_cvtdq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode, - SDNode OpNodeRnd, OpndItins itins> { + SDNode OpNodeRnd, X86FoldableSchedWrite sched> { let Predicates = [HasAVX512] in defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16f32_info, v16i32_info, OpNode, - itins>, + sched>, avx512_vcvt_fp_rc<opc, OpcodeStr, v16f32_info, v16i32_info, - OpNodeRnd, itins>, EVEX_V512; + OpNodeRnd, sched>, EVEX_V512; let Predicates = [HasVLX] in { defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i32x_info, OpNode, - itins>, EVEX_V128; + sched>, EVEX_V128; defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i32x_info, OpNode, - itins>, EVEX_V256; + sched>, EVEX_V256; } } // Convert Float to Signed/Unsigned Doubleword with truncation multiclass avx512_cvttps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, - SDNode OpNodeRnd, OpndItins itins> { + SDNode OpNodeRnd, X86FoldableSchedWrite sched> { let Predicates = [HasAVX512] in { defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, - itins>, + sched>, avx512_vcvt_fp_sae<opc, OpcodeStr, v16i32_info, v16f32_info, - OpNodeRnd, itins>, EVEX_V512; + OpNodeRnd, sched>, EVEX_V512; } let Predicates = [HasVLX] in { defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, - itins>, EVEX_V128; + sched>, EVEX_V128; defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, - itins>, EVEX_V256; + sched>, EVEX_V256; } } // Convert Float to Signed/Unsigned Doubleword multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, - SDNode OpNodeRnd, OpndItins itins> { + SDNode OpNodeRnd, X86FoldableSchedWrite sched> { let Predicates = [HasAVX512] in { defm Z : avx512_vcvt_fp<opc, OpcodeStr, v16i32_info, v16f32_info, OpNode, - itins>, + sched>, avx512_vcvt_fp_rc<opc, OpcodeStr, v16i32_info, v16f32_info, - OpNodeRnd, itins>, EVEX_V512; + OpNodeRnd, sched>, EVEX_V512; } let Predicates = [HasVLX] in { defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f32x_info, OpNode, - itins>, EVEX_V128; + sched>, EVEX_V128; defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f32x_info, OpNode, - itins>, EVEX_V256; + sched>, EVEX_V256; } } // Convert Double to Signed/Unsigned Doubleword with truncation multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode OpNode128, SDNode OpNodeRnd, - OpndItins itins> { + X86FoldableSchedWrite sched> { let Predicates = [HasAVX512] in { defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, - itins>, + sched>, avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info, - OpNodeRnd, itins>, EVEX_V512; + OpNodeRnd, sched>, EVEX_V512; } let Predicates = [HasVLX] in { // we need "x"/"y" suffixes in order to distinguish between 128 and 256 @@ -7194,9 +7181,9 @@ multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly // due to the same reason. defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, - OpNode128, itins, "{1to2}", "{x}">, EVEX_V128; + OpNode128, sched, "{1to2}", "{x}">, EVEX_V128; defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, - itins, "{1to4}", "{y}">, EVEX_V256; + sched, "{1to4}", "{y}">, EVEX_V256; def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>; @@ -7211,12 +7198,12 @@ multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, // Convert Double to Signed/Unsigned Doubleword multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, - SDNode OpNodeRnd, OpndItins itins> { + SDNode OpNodeRnd, X86FoldableSchedWrite sched> { let Predicates = [HasAVX512] in { defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode, - itins>, + sched>, avx512_vcvt_fp_rc<opc, OpcodeStr, v8i32x_info, v8f64_info, - OpNodeRnd, itins>, EVEX_V512; + OpNodeRnd, sched>, EVEX_V512; } let Predicates = [HasVLX] in { // we need "x"/"y" suffixes in order to distinguish between 128 and 256 @@ -7224,9 +7211,9 @@ multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly // due to the same reason. defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode, - itins, "{1to2}", "{x}">, EVEX_V128; + sched, "{1to2}", "{x}">, EVEX_V128; defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode, - itins, "{1to4}", "{y}">, EVEX_V256; + sched, "{1to4}", "{y}">, EVEX_V256; def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>; @@ -7241,101 +7228,102 @@ multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode, // Convert Double to Signed/Unsigned Quardword multiclass avx512_cvtpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, - SDNode OpNodeRnd, OpndItins itins> { + SDNode OpNodeRnd, X86FoldableSchedWrite sched> { let Predicates = [HasDQI] in { defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, - itins>, + sched>, avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f64_info, - OpNodeRnd,itins>, EVEX_V512; + OpNodeRnd, sched>, EVEX_V512; } let Predicates = [HasDQI, HasVLX] in { defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, - itins>, EVEX_V128; + sched>, EVEX_V128; defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, - itins>, EVEX_V256; + sched>, EVEX_V256; } } // Convert Double to Signed/Unsigned Quardword with truncation multiclass avx512_cvttpd2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, - SDNode OpNodeRnd, OpndItins itins> { + SDNode OpNodeRnd, X86FoldableSchedWrite sched> { let Predicates = [HasDQI] in { defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f64_info, OpNode, - itins>, + sched>, avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f64_info, - OpNodeRnd, itins>, EVEX_V512; + OpNodeRnd, sched>, EVEX_V512; } let Predicates = [HasDQI, HasVLX] in { defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v2f64x_info, OpNode, - itins>, EVEX_V128; + sched>, EVEX_V128; defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f64x_info, OpNode, - itins>, EVEX_V256; + sched>, EVEX_V256; } } // Convert Signed/Unsigned Quardword to Double multiclass avx512_cvtqq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode, - SDNode OpNodeRnd, OpndItins itins> { + SDNode OpNodeRnd, X86FoldableSchedWrite sched> { let Predicates = [HasDQI] in { defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8i64_info, OpNode, - itins>, + sched>, avx512_vcvt_fp_rc<opc, OpcodeStr, v8f64_info, v8i64_info, - OpNodeRnd, itins>, EVEX_V512; + OpNodeRnd, sched>, EVEX_V512; } let Predicates = [HasDQI, HasVLX] in { defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v2i64x_info, OpNode, - itins>, EVEX_V128; + sched>, EVEX_V128; defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i64x_info, OpNode, - itins>, EVEX_V256; + sched>, EVEX_V256; } } // Convert Float to Signed/Unsigned Quardword multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, - SDNode OpNodeRnd, OpndItins itins> { + SDNode OpNodeRnd, X86FoldableSchedWrite sched> { let Predicates = [HasDQI] in { defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, - itins>, + sched>, avx512_vcvt_fp_rc<opc, OpcodeStr, v8i64_info, v8f32x_info, - OpNodeRnd, itins>, EVEX_V512; + OpNodeRnd, sched>, EVEX_V512; } let Predicates = [HasDQI, HasVLX] in { // Explicitly specified broadcast string, since we take only 2 elements // from v4f32x_info source defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode, - itins, "{1to2}", "", f64mem>, EVEX_V128; + sched, "{1to2}", "", f64mem>, EVEX_V128; defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, - itins>, EVEX_V256; + sched>, EVEX_V256; } } // Convert Float to Signed/Unsigned Quardword with truncation multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode, - SDNode OpNode128, SDNode OpNodeRnd, OpndItins itins> { + SDNode OpNode128, SDNode OpNodeRnd, + X86FoldableSchedWrite sched> { let Predicates = [HasDQI] in { - defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, - itins>, + defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode, sched>, avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info, - OpNodeRnd, itins>, EVEX_V512; + OpNodeRnd, sched>, EVEX_V512; } let Predicates = [HasDQI, HasVLX] in { // Explicitly specified broadcast string, since we take only 2 elements // from v4f32x_info source defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode128, - itins, "{1to2}", "", f64mem>, EVEX_V128; + sched, "{1to2}", "", f64mem>, EVEX_V128; defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode, - itins>, EVEX_V256; + sched>, EVEX_V256; } } // Convert Signed/Unsigned Quardword to Float multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode, - SDNode OpNode128, SDNode OpNodeRnd, OpndItins itins> { + SDNode OpNode128, SDNode OpNodeRnd, + X86FoldableSchedWrite sched> { let Predicates = [HasDQI] in { defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode, - itins>, + sched>, avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info, - OpNodeRnd, itins>, EVEX_V512; + OpNodeRnd, sched>, EVEX_V512; } let Predicates = [HasDQI, HasVLX] in { // we need "x"/"y" suffixes in order to distinguish between 128 and 256 @@ -7343,9 +7331,9 @@ multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode, // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly // due to the same reason. defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, OpNode128, - itins, "{1to2}", "{x}">, EVEX_V128; + sched, "{1to2}", "{x}">, EVEX_V128; defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode, - itins, "{1to4}", "{y}">, EVEX_V256; + sched, "{1to4}", "{y}">, EVEX_V256; def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}", (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>; @@ -7359,98 +7347,98 @@ multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode, } defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP, - SSE_CVT_I2PD>, XS, EVEX_CD8<32, CD8VH>; + WriteCvtI2F>, XS, EVEX_CD8<32, CD8VH>; defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp, - X86VSintToFpRnd, SSE_CVT_I2PS>, + X86VSintToFpRnd, WriteCvtI2F>, PS, EVEX_CD8<32, CD8VF>; defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", fp_to_sint, - X86cvttp2siRnd, SSE_CVT_PS2I>, + X86cvttp2siRnd, WriteCvtF2I>, XS, EVEX_CD8<32, CD8VF>; defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", fp_to_sint, X86cvttp2si, - X86cvttp2siRnd, SSE_CVT_PD2I>, + X86cvttp2siRnd, WriteCvtF2I>, PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", fp_to_uint, - X86cvttp2uiRnd, SSE_CVT_PS2I>, PS, + X86cvttp2uiRnd, WriteCvtF2I>, PS, EVEX_CD8<32, CD8VF>; defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint, - X86cvttp2ui, X86cvttp2uiRnd, SSE_CVT_PD2I>, + X86cvttp2ui, X86cvttp2uiRnd, WriteCvtF2I>, PS, VEX_W, EVEX_CD8<64, CD8VF>; defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp, - X86VUintToFP, SSE_CVT_I2PD>, XS, + X86VUintToFP, WriteCvtI2F>, XS, EVEX_CD8<32, CD8VH>; defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp, - X86VUintToFpRnd, SSE_CVT_I2PS>, XD, + X86VUintToFpRnd, WriteCvtI2F>, XD, EVEX_CD8<32, CD8VF>; defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, - X86cvtp2IntRnd, SSE_CVT_PS2I>, PD, + X86cvtp2IntRnd, WriteCvtF2I>, PD, EVEX_CD8<32, CD8VF>; defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, - X86cvtp2IntRnd, SSE_CVT_PD2I>, XD, + X86cvtp2IntRnd, WriteCvtF2I>, XD, VEX_W, EVEX_CD8<64, CD8VF>; defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, - X86cvtp2UIntRnd, SSE_CVT_PS2I>, + X86cvtp2UIntRnd, WriteCvtF2I>, PS, EVEX_CD8<32, CD8VF>; defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, - X86cvtp2UIntRnd, SSE_CVT_PD2I>, VEX_W, + X86cvtp2UIntRnd, WriteCvtF2I>, VEX_W, PS, EVEX_CD8<64, CD8VF>; defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, - X86cvtp2IntRnd, SSE_CVT_PD2I>, VEX_W, + X86cvtp2IntRnd, WriteCvtF2I>, VEX_W, PD, EVEX_CD8<64, CD8VF>; defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, - X86cvtp2IntRnd, SSE_CVT_PS2I>, PD, + X86cvtp2IntRnd, WriteCvtF2I>, PD, EVEX_CD8<32, CD8VH>; defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, - X86cvtp2UIntRnd, SSE_CVT_PD2I>, VEX_W, + X86cvtp2UIntRnd, WriteCvtF2I>, VEX_W, PD, EVEX_CD8<64, CD8VF>; defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, - X86cvtp2UIntRnd, SSE_CVT_PS2I>, PD, + X86cvtp2UIntRnd, WriteCvtF2I>, PD, EVEX_CD8<32, CD8VH>; defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", fp_to_sint, - X86cvttp2siRnd, SSE_CVT_PD2I>, VEX_W, + X86cvttp2siRnd, WriteCvtF2I>, VEX_W, PD, EVEX_CD8<64, CD8VF>; defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", fp_to_sint, X86cvttp2si, - X86cvttp2siRnd, SSE_CVT_PS2I>, PD, + X86cvttp2siRnd, WriteCvtF2I>, PD, EVEX_CD8<32, CD8VH>; defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", fp_to_uint, - X86cvttp2uiRnd, SSE_CVT_PD2I>, VEX_W, + X86cvttp2uiRnd, WriteCvtF2I>, VEX_W, PD, EVEX_CD8<64, CD8VF>; defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", fp_to_uint, X86cvttp2ui, - X86cvttp2uiRnd, SSE_CVT_PS2I>, PD, + X86cvttp2uiRnd, WriteCvtF2I>, PD, EVEX_CD8<32, CD8VH>; defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp, - X86VSintToFpRnd, SSE_CVT_I2PD>, VEX_W, XS, + X86VSintToFpRnd, WriteCvtI2F>, VEX_W, XS, EVEX_CD8<64, CD8VF>; defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp, - X86VUintToFpRnd, SSE_CVT_I2PD>, VEX_W, XS, + X86VUintToFpRnd, WriteCvtI2F>, VEX_W, XS, EVEX_CD8<64, CD8VF>; defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp, X86VSintToFP, - X86VSintToFpRnd, SSE_CVT_I2PS>, VEX_W, PS, + X86VSintToFpRnd, WriteCvtI2F>, VEX_W, PS, EVEX_CD8<64, CD8VF>; defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp, X86VUintToFP, - X86VUintToFpRnd, SSE_CVT_I2PS>, VEX_W, XD, + X86VUintToFpRnd, WriteCvtI2F>, VEX_W, XD, EVEX_CD8<64, CD8VF>; let Predicates = [HasAVX512, NoVLX] in { @@ -7609,41 +7597,41 @@ def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))), multiclass avx512_cvtph2ps<X86VectorVTInfo _dest, X86VectorVTInfo _src, X86MemOperand x86memop, PatFrag ld_frag, - OpndItins itins> { + X86FoldableSchedWrite sched> { defm rr : AVX512_maskable<0x13, MRMSrcReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src), "vcvtph2ps", "$src", "$src", (X86cvtph2ps (_src.VT _src.RC:$src))>, - T8PD, Sched<[itins.Sched]>; + T8PD, Sched<[sched]>; defm rm : AVX512_maskable<0x13, MRMSrcMem, _dest, (outs _dest.RC:$dst), (ins x86memop:$src), "vcvtph2ps", "$src", "$src", (X86cvtph2ps (_src.VT (bitconvert (ld_frag addr:$src))))>, - T8PD, Sched<[itins.Sched.Folded]>; + T8PD, Sched<[sched.Folded]>; } multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, - OpndItins itins> { + X86FoldableSchedWrite sched> { defm rrb : AVX512_maskable<0x13, MRMSrcReg, _dest, (outs _dest.RC:$dst), (ins _src.RC:$src), "vcvtph2ps", "{sae}, $src", "$src, {sae}", (X86cvtph2psRnd (_src.VT _src.RC:$src), (i32 FROUND_NO_EXC))>, - T8PD, EVEX_B, Sched<[itins.Sched]>; + T8PD, EVEX_B, Sched<[sched]>; } let Predicates = [HasAVX512] in defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64, - SSE_CVT_PH2PS>, - avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, SSE_CVT_PH2PS>, + WriteCvtF2F>, + avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtF2F>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; let Predicates = [HasVLX] in { defm VCVTPH2PSZ256 : avx512_cvtph2ps<v8f32x_info, v8i16x_info, f128mem, - loadv2i64, SSE_CVT_PH2PS>, EVEX, EVEX_V256, + loadv2i64, WriteCvtF2F>, EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>; defm VCVTPH2PSZ128 : avx512_cvtph2ps<v4f32x_info, v8i16x_info, f64mem, - loadv2i64, SSE_CVT_PH2PS>, EVEX, EVEX_V128, + loadv2i64, WriteCvtF2F>, EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; // Pattern match vcvtph2ps of a scalar i64 load. @@ -7657,47 +7645,46 @@ let Predicates = [HasVLX] in { } multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src, - X86MemOperand x86memop, OpndItins itins> { + X86MemOperand x86memop, X86FoldableSchedWrite sched> { defm rr : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst), (ins _src.RC:$src1, i32u8imm:$src2), "vcvtps2ph", "$src2, $src1", "$src1, $src2", (X86cvtps2ph (_src.VT _src.RC:$src1), (i32 imm:$src2)), 0, 0>, - AVX512AIi8Base, Sched<[itins.Sched]>; + AVX512AIi8Base, Sched<[sched]>; let hasSideEffects = 0, mayStore = 1 in { def mr : AVX512AIi8<0x1D, MRMDestMem, (outs), (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2), "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs), (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>, - EVEX_K, Sched<[itins.Sched.Folded, ReadAfterLd]>; + EVEX_K, Sched<[sched.Folded, ReadAfterLd]>; } } multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, - OpndItins itins> { + X86FoldableSchedWrite sched> { let hasSideEffects = 0 in defm rrb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest, (outs _dest.RC:$dst), (ins _src.RC:$src1, i32u8imm:$src2), "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>, - EVEX_B, AVX512AIi8Base, Sched<[itins.Sched]>; + EVEX_B, AVX512AIi8Base, Sched<[sched]>; } let Predicates = [HasAVX512] in { - defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem, - SSE_CVT_PS2PH>, + defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem, WriteCvtF2F>, avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, - SSE_CVT_PS2PH>, EVEX, EVEX_V512, + WriteCvtF2F>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; let Predicates = [HasVLX] in { defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem, - SSE_CVT_PS2PH>, EVEX, EVEX_V256, + WriteCvtF2F>, EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>; defm VCVTPS2PHZ128 : avx512_cvtps2ph<v8i16x_info, v4f32x_info, f64mem, - SSE_CVT_PS2PH>, EVEX, EVEX_V128, + WriteCvtF2F>, EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; } @@ -7737,21 +7724,21 @@ let Predicates = [HasVLX] in { // Unordered/Ordered scalar fp compare with Sea and set EFLAGS multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _, - string OpcodeStr, OpndItins itins> { + string OpcodeStr, X86FoldableSchedWrite sched> { let hasSideEffects = 0 in def rrb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2), !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"), []>, - EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[itins.Sched]>; + EVEX, EVEX_B, VEX_LIG, EVEX_V128, Sched<[sched]>; } let Defs = [EFLAGS], Predicates = [HasAVX512] in { - defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSE_COMIS>, + defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", WriteFAdd>, AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; - defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSE_COMIS>, + defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", WriteFAdd>, AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; - defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSE_COMIS>, + defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", WriteFAdd>, AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>; - defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSE_COMIS>, + defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", WriteFAdd>, AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>; } @@ -7789,140 +7776,140 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in { /// avx512_fp14_s rcp14ss, rcp14sd, rsqrt14ss, rsqrt14sd multiclass avx512_fp14_s<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, X86VectorVTInfo _> { + X86FoldableSchedWrite sched, X86VectorVTInfo _> { let Predicates = [HasAVX512], ExeDomain = _.ExeDomain in { defm rr : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))>, - EVEX_4V, Sched<[itins.Sched]>; + EVEX_4V, Sched<[sched]>; defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2)>, EVEX_4V, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } } -defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SSE_RCPS, f32x_info>, +defm VRCP14SS : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, WriteFRcp, f32x_info>, EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable; -defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SSE_RCPS, f64x_info>, +defm VRCP14SD : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, WriteFRcp, f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable; -defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, SSE_RSQRTSS, f32x_info>, +defm VRSQRT14SS : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s, WriteFRsqrt, f32x_info>, EVEX_CD8<32, CD8VT1>, T8PD, NotMemoryFoldable; -defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, SSE_RSQRTSS, f64x_info>, +defm VRSQRT14SD : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s, WriteFRsqrt, f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>, T8PD, NotMemoryFoldable; /// avx512_fp14_p rcp14ps, rcp14pd, rsqrt14ps, rsqrt14pd multiclass avx512_fp14_p<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, X86VectorVTInfo _> { + X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src), OpcodeStr, "$src", "$src", (_.FloatVT (OpNode _.RC:$src))>, EVEX, T8PD, - Sched<[itins.Sched]>; + Sched<[sched]>; defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.MemOp:$src), OpcodeStr, "$src", "$src", (OpNode (_.FloatVT (bitconvert (_.LdFrag addr:$src))))>, EVEX, T8PD, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.ScalarMemOp:$src), OpcodeStr, "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr, (OpNode (_.FloatVT (X86VBroadcast (_.ScalarLdFrag addr:$src))))>, - EVEX, T8PD, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>; + EVEX, T8PD, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; } } multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode, - SizeItins itins> { - defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, itins.s, + X86FoldableSchedWrite sched> { + defm PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), OpNode, sched, v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>; - defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, itins.d, + defm PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), OpNode, sched, v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; // Define only if AVX512VL feature is present. let Predicates = [HasVLX] in { defm PSZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), - OpNode, itins.s, v4f32x_info>, + OpNode, sched, v4f32x_info>, EVEX_V128, EVEX_CD8<32, CD8VF>; defm PSZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ps"), - OpNode, itins.s, v8f32x_info>, + OpNode, sched, v8f32x_info>, EVEX_V256, EVEX_CD8<32, CD8VF>; defm PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), - OpNode, itins.d, v2f64x_info>, + OpNode, sched, v2f64x_info>, EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>; defm PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "pd"), - OpNode, itins.d, v4f64x_info>, + OpNode, sched, v4f64x_info>, EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>; } } -defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, SSE_RSQRT_P>; -defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, SSE_RCP_P>; +defm VRSQRT14 : avx512_fp14_p_vl_all<0x4E, "vrsqrt14", X86rsqrt14, WriteFRsqrt>; +defm VRCP14 : avx512_fp14_p_vl_all<0x4C, "vrcp14", X86rcp14, WriteFRcp>; /// avx512_fp28_s rcp28ss, rcp28sd, rsqrt28ss, rsqrt28sd multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _, - SDNode OpNode, OpndItins itins> { + SDNode OpNode, X86FoldableSchedWrite sched> { let ExeDomain = _.ExeDomain in { defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), (i32 FROUND_CURRENT))>, - Sched<[itins.Sched]>; + Sched<[sched]>; defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2), OpcodeStr, "{sae}, $src2, $src1", "$src1, $src2, {sae}", (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), (i32 FROUND_NO_EXC))>, EVEX_B, - Sched<[itins.Sched]>; + Sched<[sched]>; defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (OpNode (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2, (i32 FROUND_CURRENT))>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } } multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode, - SizeItins itins> { - defm SS : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, itins.s>, + X86FoldableSchedWrite sched> { + defm SS : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, sched>, EVEX_CD8<32, CD8VT1>; - defm SD : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, itins.d>, + defm SD : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, sched>, EVEX_CD8<64, CD8VT1>, VEX_W; } let Predicates = [HasERI] in { - defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, SSE_RCP_S>, + defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, WriteFRcp>, T8PD, EVEX_4V; - defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, SSE_RSQRT_S>, + defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, WriteFRsqrt>, T8PD, EVEX_4V; } -defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds, SSE_ALU_ITINS_S>, +defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexpRnds, WriteFAdd>, T8PD, EVEX_4V; /// avx512_fp28_p rcp28ps, rcp28pd, rsqrt28ps, rsqrt28pd multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, - SDNode OpNode, OpndItins itins> { + SDNode OpNode, X86FoldableSchedWrite sched> { let ExeDomain = _.ExeDomain in { defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src), OpcodeStr, "$src", "$src", (OpNode (_.VT _.RC:$src), (i32 FROUND_CURRENT))>, - Sched<[itins.Sched]>; + Sched<[sched]>; defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.MemOp:$src), OpcodeStr, "$src", "$src", (OpNode (_.FloatVT (bitconvert (_.LdFrag addr:$src))), (i32 FROUND_CURRENT))>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.ScalarMemOp:$src), OpcodeStr, @@ -7930,113 +7917,113 @@ multiclass avx512_fp28_p<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, (OpNode (_.FloatVT (X86VBroadcast (_.ScalarLdFrag addr:$src))), (i32 FROUND_CURRENT))>, EVEX_B, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } } multiclass avx512_fp28_p_round<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, - SDNode OpNode, OpndItins itins> { + SDNode OpNode, X86FoldableSchedWrite sched> { let ExeDomain = _.ExeDomain in defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src), OpcodeStr, "{sae}, $src", "$src, {sae}", (OpNode (_.VT _.RC:$src), (i32 FROUND_NO_EXC))>, - EVEX_B, Sched<[itins.Sched]>; + EVEX_B, Sched<[sched]>; } multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode, - SizeItins itins> { - defm PS : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, itins.s>, - avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode, itins.s>, + X86FoldableSchedWrite sched> { + defm PS : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched>, + avx512_fp28_p_round<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched>, T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>; - defm PD : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, itins.d>, - avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode, itins.d>, + defm PD : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched>, + avx512_fp28_p_round<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched>, T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; } multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr, - SDNode OpNode, SizeItins itins> { + SDNode OpNode, X86FoldableSchedWrite sched> { // Define only if AVX512VL feature is present. let Predicates = [HasVLX] in { - defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, itins.s>, + defm PSZ128 : avx512_fp28_p<opc, OpcodeStr#"ps", v4f32x_info, OpNode, sched>, EVEX_V128, T8PD, EVEX_CD8<32, CD8VF>; - defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, itins.s>, + defm PSZ256 : avx512_fp28_p<opc, OpcodeStr#"ps", v8f32x_info, OpNode, sched>, EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>; - defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, itins.d>, + defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode, sched>, EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; - defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, itins.d>, + defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode, sched>, EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>; } } let Predicates = [HasERI] in { - defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, SSE_RSQRT_P>, EVEX; - defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, SSE_RCP_P>, EVEX; - defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, SSE_ALU_ITINS_P>, EVEX; + defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, WriteFRsqrt>, EVEX; + defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, WriteFRcp>, EVEX; + defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, WriteFAdd>, EVEX; } -defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, SSE_ALU_ITINS_P>, +defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexpRnd, WriteFAdd>, avx512_fp_unaryop_packed<0x42, "vgetexp", X86fgetexpRnd, - SSE_ALU_ITINS_P>, EVEX; + WriteFAdd>, EVEX; -multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr, OpndItins itins, - X86VectorVTInfo _>{ +multiclass avx512_sqrt_packed_round<bits<8> opc, string OpcodeStr, + X86FoldableSchedWrite sched, X86VectorVTInfo _>{ let ExeDomain = _.ExeDomain in defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src, AVX512RC:$rc), OpcodeStr, "$rc, $src", "$src, $rc", (_.VT (X86fsqrtRnd _.RC:$src, (i32 imm:$rc)))>, - EVEX, EVEX_B, EVEX_RC, Sched<[itins.Sched]>; + EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; } -multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, OpndItins itins, - X86VectorVTInfo _>{ +multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, + X86FoldableSchedWrite sched, X86VectorVTInfo _>{ let ExeDomain = _.ExeDomain in { defm r: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src), OpcodeStr, "$src", "$src", (_.FloatVT (fsqrt _.RC:$src))>, EVEX, - Sched<[itins.Sched]>; + Sched<[sched]>; defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.MemOp:$src), OpcodeStr, "$src", "$src", (fsqrt (_.FloatVT (bitconvert (_.LdFrag addr:$src))))>, EVEX, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; defm mb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.ScalarMemOp:$src), OpcodeStr, "${src}"##_.BroadcastStr, "${src}"##_.BroadcastStr, (fsqrt (_.FloatVT (X86VBroadcast (_.ScalarLdFrag addr:$src))))>, - EVEX, EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>; + EVEX, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; } } multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr> { - defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), SSE_SQRTPS, v16f32_info>, + defm PSZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), WriteFSqrt, v16f32_info>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; - defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), SSE_SQRTPD, v8f64_info>, + defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), WriteFSqrt, v8f64_info>, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; // Define only if AVX512VL feature is present. let Predicates = [HasVLX] in { defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), - SSE_SQRTPS, v4f32x_info>, + WriteFSqrt, v4f32x_info>, EVEX_V128, PS, EVEX_CD8<32, CD8VF>; defm PSZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"), - SSE_SQRTPS, v8f32x_info>, + WriteFSqrt, v8f32x_info>, EVEX_V256, PS, EVEX_CD8<32, CD8VF>; defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), - SSE_SQRTPD, v2f64x_info>, + WriteFSqrt, v2f64x_info>, EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>; defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"), - SSE_SQRTPD, v4f64x_info>, + WriteFSqrt, v4f64x_info>, EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>; } } multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr> { - defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), SSE_SQRTPS, + defm PSZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "ps"), WriteFSqrt, v16f32_info>, EVEX_V512, PS, EVEX_CD8<32, CD8VF>; - defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), SSE_SQRTPD, + defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"), WriteFSqrt, v8f64_info>, EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>; } -multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, OpndItins itins, +multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _, string SUFF, Intrinsic Intr> { let ExeDomain = _.ExeDomain in { defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), @@ -8045,32 +8032,32 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, OpndItins itins, (X86fsqrtRnds (_.VT _.RC:$src1), (_.VT _.RC:$src2), (i32 FROUND_CURRENT))>, - Sched<[itins.Sched]>; + Sched<[sched]>; defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", (X86fsqrtRnds (_.VT _.RC:$src1), _.ScalarIntMemCPat:$src2, (i32 FROUND_CURRENT))>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr, "$rc, $src2, $src1", "$src1, $src2, $rc", (X86fsqrtRnds (_.VT _.RC:$src1), (_.VT _.RC:$src2), (i32 imm:$rc))>, - EVEX_B, EVEX_RC, Sched<[itins.Sched]>; + EVEX_B, EVEX_RC, Sched<[sched]>; let isCodeGenOnly = 1, hasSideEffects = 0, Predicates=[HasAVX512] in { def r : I<opc, MRMSrcReg, (outs _.FRC:$dst), (ins _.FRC:$src1, _.FRC:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - Sched<[itins.Sched]>; + Sched<[sched]>; let mayLoad = 1 in def m : I<opc, MRMSrcMem, (outs _.FRC:$dst), (ins _.FRC:$src1, _.ScalarMemOp:$src2), OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } } @@ -8096,10 +8083,10 @@ multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, OpndItins itins, } multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr> { - defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", SSE_SQRTPS, f32x_info, "SS", + defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", WriteFSqrt, f32x_info, "SS", int_x86_sse_sqrt_ss>, EVEX_CD8<32, CD8VT1>, EVEX_4V, XS, NotMemoryFoldable; - defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", SSE_SQRTPD, f64x_info, "SD", + defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", WriteFSqrt, f64x_info, "SD", int_x86_sse2_sqrt_sd>, EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W, NotMemoryFoldable; @@ -8111,21 +8098,21 @@ defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt">, defm VSQRT : avx512_sqrt_scalar_all<0x51, "vsqrt">, VEX_LIG; multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, - OpndItins itins, X86VectorVTInfo _> { + X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm r_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (_.VT (X86RndScales (_.VT _.RC:$src1), (_.VT _.RC:$src2), (i32 imm:$src3)))>, - Sched<[itins.Sched]>; + Sched<[sched]>; defm rb_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), OpcodeStr, "$src3, {sae}, $src2, $src1", "$src1, $src2, {sae}, $src3", (_.VT (X86RndScalesRnd (_.VT _.RC:$src1), (_.VT _.RC:$src2), (i32 imm:$src3), (i32 FROUND_NO_EXC)))>, EVEX_B, - Sched<[itins.Sched]>; + Sched<[sched]>; defm m_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.IntScalarMemOp:$src2, i32u8imm:$src3), @@ -8133,19 +8120,19 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (_.VT (X86RndScales _.RC:$src1, _.ScalarIntMemCPat:$src2, (i32 imm:$src3)))>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [HasAVX512] in { def r : I<opc, MRMSrcReg, (outs _.FRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, i32u8imm:$src3), OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, Sched<[itins.Sched]>; + []>, Sched<[sched]>; let mayLoad = 1 in def m : I<opc, MRMSrcMem, (outs _.FRC:$dst), (ins _.FRC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), OpcodeStr#"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", - []>, Sched<[itins.Sched.Folded, ReadAfterLd]>; + []>, Sched<[sched.Folded, ReadAfterLd]>; } } @@ -8186,10 +8173,10 @@ multiclass avx512_rndscale_scalar<bits<8> opc, string OpcodeStr, } } -defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", SSE_ALU_F32S, +defm VRNDSCALESS : avx512_rndscale_scalar<0x0A, "vrndscaless", WriteFAdd, f32x_info>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VT1>; -defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", SSE_ALU_F64S, +defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", WriteFAdd, f64x_info>, VEX_W, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VT1>; @@ -8197,36 +8184,26 @@ defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", SSE_ALU_F64S, // Integer truncate and extend operations //------------------------------------------------- -let Sched = WriteShuffle256 in -def AVX512_EXTEND : OpndItins< - NoItinerary, NoItinerary ->; - -let Sched = WriteShuffle256 in -def AVX512_TRUNCATE : OpndItins< - NoItinerary, NoItinerary ->; - multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, X86VectorVTInfo SrcInfo, + X86FoldableSchedWrite sched, X86VectorVTInfo SrcInfo, X86VectorVTInfo DestInfo, X86MemOperand x86memop> { let ExeDomain = DestInfo.ExeDomain in defm rr : AVX512_maskable<opc, MRMDestReg, DestInfo, (outs DestInfo.RC:$dst), (ins SrcInfo.RC:$src1), OpcodeStr ,"$src1", "$src1", (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1)))>, - EVEX, T8XS, Sched<[itins.Sched]>; + EVEX, T8XS, Sched<[sched]>; let mayStore = 1, mayLoad = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in { def mr : AVX512XS8I<opc, MRMDestMem, (outs), (ins x86memop:$dst, SrcInfo.RC:$src), OpcodeStr # "\t{$src, $dst|$dst, $src}", []>, - EVEX, Sched<[itins.Sched.Folded]>; + EVEX, Sched<[sched.Folded]>; def mrk : AVX512XS8I<opc, MRMDestMem, (outs), (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>, - EVEX, EVEX_K, Sched<[itins.Sched.Folded]>; + EVEX, EVEX_K, Sched<[sched.Folded]>; }//mayStore = 1, mayLoad = 1, hasSideEffects = 0 } @@ -8245,7 +8222,7 @@ multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo, } multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128, - SDNode OpNode256, SDNode OpNode512, OpndItins itins, + SDNode OpNode256, SDNode OpNode512, X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128, X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, @@ -8254,117 +8231,117 @@ multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode128, PatFrag mtruncFrag, Predicate prd = HasAVX512>{ let Predicates = [HasVLX, prd] in { - defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, itins, + defm Z128: avx512_trunc_common<opc, OpcodeStr, OpNode128, sched, VTSrcInfo.info128, DestInfoZ128, x86memopZ128>, avx512_trunc_mr_lowering<VTSrcInfo.info128, DestInfoZ128, truncFrag, mtruncFrag>, EVEX_V128; - defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, itins, + defm Z256: avx512_trunc_common<opc, OpcodeStr, OpNode256, sched, VTSrcInfo.info256, DestInfoZ256, x86memopZ256>, avx512_trunc_mr_lowering<VTSrcInfo.info256, DestInfoZ256, truncFrag, mtruncFrag>, EVEX_V256; } let Predicates = [prd] in - defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, itins, + defm Z: avx512_trunc_common<opc, OpcodeStr, OpNode512, sched, VTSrcInfo.info512, DestInfoZ, x86memopZ>, avx512_trunc_mr_lowering<VTSrcInfo.info512, DestInfoZ, truncFrag, mtruncFrag>, EVEX_V512; } multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, PatFrag StoreNode, + X86FoldableSchedWrite sched, PatFrag StoreNode, PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> { - defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode, itins, + defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, InVecNode, sched, avx512vl_i64_info, v16i8x_info, v16i8x_info, v16i8x_info, i16mem, i32mem, i64mem, StoreNode, MaskedStoreNode>, EVEX_CD8<8, CD8VO>; } multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, PatFrag StoreNode, + X86FoldableSchedWrite sched, PatFrag StoreNode, PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> { - defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, itins, + defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, sched, avx512vl_i64_info, v8i16x_info, v8i16x_info, v8i16x_info, i32mem, i64mem, i128mem, StoreNode, MaskedStoreNode>, EVEX_CD8<16, CD8VQ>; } multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, PatFrag StoreNode, + X86FoldableSchedWrite sched, PatFrag StoreNode, PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> { - defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, itins, + defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, sched, avx512vl_i64_info, v4i32x_info, v4i32x_info, v8i32x_info, i64mem, i128mem, i256mem, StoreNode, MaskedStoreNode>, EVEX_CD8<32, CD8VH>; } multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, PatFrag StoreNode, + X86FoldableSchedWrite sched, PatFrag StoreNode, PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> { - defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, itins, + defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, InVecNode, OpNode, sched, avx512vl_i32_info, v16i8x_info, v16i8x_info, v16i8x_info, i32mem, i64mem, i128mem, StoreNode, MaskedStoreNode>, EVEX_CD8<8, CD8VQ>; } multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, PatFrag StoreNode, + X86FoldableSchedWrite sched, PatFrag StoreNode, PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> { - defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, itins, + defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, sched, avx512vl_i32_info, v8i16x_info, v8i16x_info, v16i16x_info, i64mem, i128mem, i256mem, StoreNode, MaskedStoreNode>, EVEX_CD8<16, CD8VH>; } multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, PatFrag StoreNode, + X86FoldableSchedWrite sched, PatFrag StoreNode, PatFrag MaskedStoreNode, SDNode InVecNode = OpNode> { defm NAME: avx512_trunc<opc, OpcodeStr, InVecNode, OpNode, OpNode, - itins, avx512vl_i16_info, v16i8x_info, v16i8x_info, + sched, avx512vl_i16_info, v16i8x_info, v16i8x_info, v32i8x_info, i64mem, i128mem, i256mem, StoreNode, MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>; } -defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", trunc, AVX512_TRUNCATE, +defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", trunc, WriteShuffle256, truncstorevi8, masked_truncstorevi8, X86vtrunc>; -defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, AVX512_TRUNCATE, +defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, WriteShuffle256, truncstore_s_vi8, masked_truncstore_s_vi8>; -defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, AVX512_TRUNCATE, +defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, WriteShuffle256, truncstore_us_vi8, masked_truncstore_us_vi8>; -defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, AVX512_TRUNCATE, +defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", trunc, WriteShuffle256, truncstorevi16, masked_truncstorevi16, X86vtrunc>; -defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, AVX512_TRUNCATE, +defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, WriteShuffle256, truncstore_s_vi16, masked_truncstore_s_vi16>; -defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, AVX512_TRUNCATE, +defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, WriteShuffle256, truncstore_us_vi16, masked_truncstore_us_vi16>; -defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, AVX512_TRUNCATE, +defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", trunc, WriteShuffle256, truncstorevi32, masked_truncstorevi32, X86vtrunc>; -defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, AVX512_TRUNCATE, +defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, WriteShuffle256, truncstore_s_vi32, masked_truncstore_s_vi32>; -defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, AVX512_TRUNCATE, +defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, WriteShuffle256, truncstore_us_vi32, masked_truncstore_us_vi32>; -defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, AVX512_TRUNCATE, +defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", trunc, WriteShuffle256, truncstorevi8, masked_truncstorevi8, X86vtrunc>; -defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, AVX512_TRUNCATE, +defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, WriteShuffle256, truncstore_s_vi8, masked_truncstore_s_vi8>; -defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, AVX512_TRUNCATE, +defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, WriteShuffle256, truncstore_us_vi8, masked_truncstore_us_vi8>; -defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, AVX512_TRUNCATE, +defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", trunc, WriteShuffle256, truncstorevi16, masked_truncstorevi16, X86vtrunc>; -defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, AVX512_TRUNCATE, +defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, WriteShuffle256, truncstore_s_vi16, masked_truncstore_s_vi16>; -defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, AVX512_TRUNCATE, +defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, WriteShuffle256, truncstore_us_vi16, masked_truncstore_us_vi16>; -defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, AVX512_TRUNCATE, +defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", trunc, WriteShuffle256, truncstorevi8, masked_truncstorevi8, X86vtrunc>; -defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, AVX512_TRUNCATE, +defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, WriteShuffle256, truncstore_s_vi8, masked_truncstore_s_vi8>; -defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, AVX512_TRUNCATE, +defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, WriteShuffle256, truncstore_us_vi8, masked_truncstore_us_vi8>; let Predicates = [HasAVX512, NoVLX] in { @@ -8384,150 +8361,150 @@ def: Pat<(v16i8 (trunc (v16i16 VR256X:$src))), VR256X:$src, sub_ymm))), sub_xmm))>; } -multiclass avx512_extend_common<bits<8> opc, string OpcodeStr, OpndItins itins, +multiclass WriteShuffle256_common<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo, X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{ let ExeDomain = DestInfo.ExeDomain in { defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src", (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>, - EVEX, Sched<[itins.Sched]>; + EVEX, Sched<[sched]>; defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), (ins x86memop:$src), OpcodeStr ,"$src", "$src", (DestInfo.VT (LdFrag addr:$src))>, - EVEX, Sched<[itins.Sched.Folded]>; + EVEX, Sched<[sched.Folded]>; } } -multiclass avx512_extend_BW<bits<8> opc, string OpcodeStr, +multiclass WriteShuffle256_BW<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode InVecNode, string ExtTy, - OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { + X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { let Predicates = [HasVLX, HasBWI] in { - defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v8i16x_info, + defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v8i16x_info, v16i8x_info, i64mem, LdFrag, InVecNode>, EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG; - defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v16i16x_info, + defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v16i16x_info, v16i8x_info, i128mem, LdFrag, OpNode>, EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG; } let Predicates = [HasBWI] in { - defm Z : avx512_extend_common<opc, OpcodeStr, itins, v32i16_info, + defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v32i16_info, v32i8x_info, i256mem, LdFrag, OpNode>, EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG; } } -multiclass avx512_extend_BD<bits<8> opc, string OpcodeStr, +multiclass WriteShuffle256_BD<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode InVecNode, string ExtTy, - OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { + X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { let Predicates = [HasVLX, HasAVX512] in { - defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v4i32x_info, + defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info, v16i8x_info, i32mem, LdFrag, InVecNode>, EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG; - defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v8i32x_info, + defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info, v16i8x_info, i64mem, LdFrag, OpNode>, EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG; } let Predicates = [HasAVX512] in { - defm Z : avx512_extend_common<opc, OpcodeStr, itins, v16i32_info, + defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info, v16i8x_info, i128mem, LdFrag, OpNode>, EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG; } } -multiclass avx512_extend_BQ<bits<8> opc, string OpcodeStr, +multiclass WriteShuffle256_BQ<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode InVecNode, string ExtTy, - OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { + X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> { let Predicates = [HasVLX, HasAVX512] in { - defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v2i64x_info, + defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info, v16i8x_info, i16mem, LdFrag, InVecNode>, EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG; - defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v4i64x_info, + defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info, v16i8x_info, i32mem, LdFrag, OpNode>, EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG; } let Predicates = [HasAVX512] in { - defm Z : avx512_extend_common<opc, OpcodeStr, itins, v8i64_info, + defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info, v16i8x_info, i64mem, LdFrag, OpNode>, EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG; } } -multiclass avx512_extend_WD<bits<8> opc, string OpcodeStr, +multiclass WriteShuffle256_WD<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode InVecNode, string ExtTy, - OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> { + X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> { let Predicates = [HasVLX, HasAVX512] in { - defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v4i32x_info, + defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v4i32x_info, v8i16x_info, i64mem, LdFrag, InVecNode>, EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG; - defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v8i32x_info, + defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v8i32x_info, v8i16x_info, i128mem, LdFrag, OpNode>, EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG; } let Predicates = [HasAVX512] in { - defm Z : avx512_extend_common<opc, OpcodeStr, itins, v16i32_info, + defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v16i32_info, v16i16x_info, i256mem, LdFrag, OpNode>, EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG; } } -multiclass avx512_extend_WQ<bits<8> opc, string OpcodeStr, +multiclass WriteShuffle256_WQ<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode InVecNode, string ExtTy, - OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> { + X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> { let Predicates = [HasVLX, HasAVX512] in { - defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v2i64x_info, + defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info, v8i16x_info, i32mem, LdFrag, InVecNode>, EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG; - defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v4i64x_info, + defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info, v8i16x_info, i64mem, LdFrag, OpNode>, EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG; } let Predicates = [HasAVX512] in { - defm Z : avx512_extend_common<opc, OpcodeStr, itins, v8i64_info, + defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info, v8i16x_info, i128mem, LdFrag, OpNode>, EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG; } } -multiclass avx512_extend_DQ<bits<8> opc, string OpcodeStr, +multiclass WriteShuffle256_DQ<bits<8> opc, string OpcodeStr, SDNode OpNode, SDNode InVecNode, string ExtTy, - OpndItins itins, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> { + X86FoldableSchedWrite sched, PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> { let Predicates = [HasVLX, HasAVX512] in { - defm Z128: avx512_extend_common<opc, OpcodeStr, itins, v2i64x_info, + defm Z128: WriteShuffle256_common<opc, OpcodeStr, sched, v2i64x_info, v4i32x_info, i64mem, LdFrag, InVecNode>, EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128; - defm Z256: avx512_extend_common<opc, OpcodeStr, itins, v4i64x_info, + defm Z256: WriteShuffle256_common<opc, OpcodeStr, sched, v4i64x_info, v4i32x_info, i128mem, LdFrag, OpNode>, EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256; } let Predicates = [HasAVX512] in { - defm Z : avx512_extend_common<opc, OpcodeStr, itins, v8i64_info, + defm Z : WriteShuffle256_common<opc, OpcodeStr, sched, v8i64_info, v8i32x_info, i256mem, LdFrag, OpNode>, EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512; } } -defm VPMOVZXBW : avx512_extend_BW<0x30, "vpmovzxbw", X86vzext, zext_invec, "z", AVX512_EXTEND>; -defm VPMOVZXBD : avx512_extend_BD<0x31, "vpmovzxbd", X86vzext, zext_invec, "z", AVX512_EXTEND>; -defm VPMOVZXBQ : avx512_extend_BQ<0x32, "vpmovzxbq", X86vzext, zext_invec, "z", AVX512_EXTEND>; -defm VPMOVZXWD : avx512_extend_WD<0x33, "vpmovzxwd", X86vzext, zext_invec, "z", AVX512_EXTEND>; -defm VPMOVZXWQ : avx512_extend_WQ<0x34, "vpmovzxwq", X86vzext, zext_invec, "z", AVX512_EXTEND>; -defm VPMOVZXDQ : avx512_extend_DQ<0x35, "vpmovzxdq", X86vzext, zext_invec, "z", AVX512_EXTEND>; +defm VPMOVZXBW : WriteShuffle256_BW<0x30, "vpmovzxbw", X86vzext, zext_invec, "z", WriteShuffle256>; +defm VPMOVZXBD : WriteShuffle256_BD<0x31, "vpmovzxbd", X86vzext, zext_invec, "z", WriteShuffle256>; +defm VPMOVZXBQ : WriteShuffle256_BQ<0x32, "vpmovzxbq", X86vzext, zext_invec, "z", WriteShuffle256>; +defm VPMOVZXWD : WriteShuffle256_WD<0x33, "vpmovzxwd", X86vzext, zext_invec, "z", WriteShuffle256>; +defm VPMOVZXWQ : WriteShuffle256_WQ<0x34, "vpmovzxwq", X86vzext, zext_invec, "z", WriteShuffle256>; +defm VPMOVZXDQ : WriteShuffle256_DQ<0x35, "vpmovzxdq", X86vzext, zext_invec, "z", WriteShuffle256>; -defm VPMOVSXBW: avx512_extend_BW<0x20, "vpmovsxbw", X86vsext, sext_invec, "s", AVX512_EXTEND>; -defm VPMOVSXBD: avx512_extend_BD<0x21, "vpmovsxbd", X86vsext, sext_invec, "s", AVX512_EXTEND>; -defm VPMOVSXBQ: avx512_extend_BQ<0x22, "vpmovsxbq", X86vsext, sext_invec, "s", AVX512_EXTEND>; -defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, sext_invec, "s", AVX512_EXTEND>; -defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, sext_invec, "s", AVX512_EXTEND>; -defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, sext_invec, "s", AVX512_EXTEND>; +defm VPMOVSXBW: WriteShuffle256_BW<0x20, "vpmovsxbw", X86vsext, sext_invec, "s", WriteShuffle256>; +defm VPMOVSXBD: WriteShuffle256_BD<0x21, "vpmovsxbd", X86vsext, sext_invec, "s", WriteShuffle256>; +defm VPMOVSXBQ: WriteShuffle256_BQ<0x22, "vpmovsxbq", X86vsext, sext_invec, "s", WriteShuffle256>; +defm VPMOVSXWD: WriteShuffle256_WD<0x23, "vpmovsxwd", X86vsext, sext_invec, "s", WriteShuffle256>; +defm VPMOVSXWQ: WriteShuffle256_WQ<0x24, "vpmovsxwq", X86vsext, sext_invec, "s", WriteShuffle256>; +defm VPMOVSXDQ: WriteShuffle256_DQ<0x25, "vpmovsxdq", X86vsext, sext_invec, "s", WriteShuffle256>; multiclass AVX512_pmovx_patterns<string OpcPrefix, SDNode ExtOp, @@ -8936,36 +8913,26 @@ let Predicates = [HasDQI, NoBWI] in { // AVX-512 - COMPRESS and EXPAND // -// FIXME: Is there a better scheduler itinerary for VPCOMPRESS/VPEXPAND? -let Sched = WriteVarShuffle256 in { -def AVX512_COMPRESS : OpndItins< - NoItinerary, NoItinerary ->; -def AVX512_EXPAND : OpndItins< - NoItinerary, NoItinerary ->; -} - multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _, - string OpcodeStr, OpndItins itins> { + string OpcodeStr, X86FoldableSchedWrite sched> { defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst), (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", (_.VT (X86compress _.RC:$src1))>, AVX5128IBase, - Sched<[itins.Sched]>; + Sched<[sched]>; let mayStore = 1, hasSideEffects = 0 in def mr : AVX5128I<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.RC:$src), OpcodeStr # "\t{$src, $dst|$dst, $src}", []>, EVEX_CD8<_.EltSize, CD8VT1>, - Sched<[itins.Sched.Folded]>; + Sched<[sched.Folded]>; def mrk : AVX5128I<opc, MRMDestMem, (outs), (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src), OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>, - Sched<[itins.Sched.Folded]>; + Sched<[sched.Folded]>; } multiclass compress_by_vec_width_lowering<X86VectorVTInfo _ > { @@ -8976,44 +8943,45 @@ multiclass compress_by_vec_width_lowering<X86VectorVTInfo _ > { } multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr, - OpndItins itins, + X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo, Predicate Pred = HasAVX512> { let Predicates = [Pred] in - defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, itins>, + defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr, sched>, compress_by_vec_width_lowering<VTInfo.info512>, EVEX_V512; let Predicates = [Pred, HasVLX] in { - defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, itins>, + defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr, sched>, compress_by_vec_width_lowering<VTInfo.info256>, EVEX_V256; - defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, itins>, + defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr, sched>, compress_by_vec_width_lowering<VTInfo.info128>, EVEX_V128; } } -defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", AVX512_COMPRESS, +// FIXME: Is there a better scheduler class for VPCOMPRESS? +defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256, avx512vl_i32_info>, EVEX; -defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", AVX512_COMPRESS, +defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256, avx512vl_i64_info>, EVEX, VEX_W; -defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", AVX512_COMPRESS, +defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256, avx512vl_f32_info>, EVEX; -defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", AVX512_COMPRESS, +defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256, avx512vl_f64_info>, EVEX, VEX_W; // expand multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _, - string OpcodeStr, OpndItins itins> { + string OpcodeStr, X86FoldableSchedWrite sched> { defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", (_.VT (X86expand _.RC:$src1))>, AVX5128IBase, - Sched<[itins.Sched]>; + Sched<[sched]>; defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1", (_.VT (X86expand (_.VT (bitconvert (_.LdFrag addr:$src1)))))>, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } multiclass expand_by_vec_width_lowering<X86VectorVTInfo _ > { @@ -9029,28 +8997,29 @@ multiclass expand_by_vec_width_lowering<X86VectorVTInfo _ > { } multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr, - OpndItins itins, + X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo, Predicate Pred = HasAVX512> { let Predicates = [Pred] in - defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, itins>, + defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr, sched>, expand_by_vec_width_lowering<VTInfo.info512>, EVEX_V512; let Predicates = [Pred, HasVLX] in { - defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, itins>, + defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr, sched>, expand_by_vec_width_lowering<VTInfo.info256>, EVEX_V256; - defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, itins>, + defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr, sched>, expand_by_vec_width_lowering<VTInfo.info128>, EVEX_V128; } } -defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", AVX512_EXPAND, +// FIXME: Is there a better scheduler class for VPEXPAND? +defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256, avx512vl_i32_info>, EVEX; -defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", AVX512_EXPAND, +defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256, avx512vl_i64_info>, EVEX, VEX_W; -defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", AVX512_EXPAND, +defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256, avx512vl_f32_info>, EVEX; -defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", AVX512_EXPAND, +defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256, avx512vl_f64_info>, EVEX, VEX_W; //handle instruction reg_vec1 = op(reg_vec,imm) @@ -9058,32 +9027,32 @@ defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", AVX512_EXPAND, // op(broadcast(eltVt),imm) //all instruction created with FROUND_CURRENT multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, X86VectorVTInfo _> { + X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, i32u8imm:$src2), OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2", (OpNode (_.VT _.RC:$src1), - (i32 imm:$src2))>, Sched<[itins.Sched]>; + (i32 imm:$src2))>, Sched<[sched]>; defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.MemOp:$src1, i32u8imm:$src2), OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2", (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), (i32 imm:$src2))>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.ScalarMemOp:$src1, i32u8imm:$src2), OpcodeStr##_.Suffix, "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2", (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))), (i32 imm:$src2))>, EVEX_B, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } } //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, - SDNode OpNode, OpndItins itins, + SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), @@ -9093,22 +9062,22 @@ multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, (OpNode (_.VT _.RC:$src1), (i32 imm:$src2), (i32 FROUND_NO_EXC))>, - EVEX_B, Sched<[itins.Sched]>; + EVEX_B, Sched<[sched]>; } multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr, AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, - SDNode OpNodeRnd, OpndItins itins, Predicate prd>{ + SDNode OpNodeRnd, X86FoldableSchedWrite sched, Predicate prd>{ let Predicates = [prd] in { - defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, itins, + defm Z : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched, _.info512>, avx512_unary_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd, - itins, _.info512>, EVEX_V512; + sched, _.info512>, EVEX_V512; } let Predicates = [prd, HasVLX] in { - defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, itins, + defm Z128 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched, _.info128>, EVEX_V128; - defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, itins, + defm Z256 : avx512_unary_fp_packed_imm<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256; } } @@ -9118,7 +9087,7 @@ multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr, // op(reg_vec2,broadcast(eltVt),imm) //all instruction created with FROUND_CURRENT multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, X86VectorVTInfo _>{ + X86FoldableSchedWrite sched, X86VectorVTInfo _>{ let ExeDomain = _.ExeDomain in { defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), @@ -9126,14 +9095,14 @@ multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), (i32 imm:$src3))>, - Sched<[itins.Sched]>; + Sched<[sched]>; defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3), OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", (OpNode (_.VT _.RC:$src1), (_.VT (bitconvert (_.LdFrag addr:$src2))), (i32 imm:$src3))>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1", @@ -9141,14 +9110,14 @@ multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, (OpNode (_.VT _.RC:$src1), (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), (i32 imm:$src3))>, EVEX_B, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } } //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) // op(reg_vec2,mem_vec,imm) multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, X86VectorVTInfo DestInfo, + X86FoldableSchedWrite sched, X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo>{ let ExeDomain = DestInfo.ExeDomain in { defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst), @@ -9157,7 +9126,7 @@ multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1), (SrcInfo.VT SrcInfo.RC:$src2), (i8 imm:$src3)))>, - Sched<[itins.Sched]>; + Sched<[sched]>; defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), (ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3), OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", @@ -9165,7 +9134,7 @@ multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, (SrcInfo.VT (bitconvert (SrcInfo.LdFrag addr:$src2))), (i8 imm:$src3)))>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } } @@ -9173,8 +9142,8 @@ multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, // op(reg_vec2,mem_vec,imm) // op(reg_vec2,broadcast(eltVt),imm) multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, X86VectorVTInfo _>: - avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, itins, _, _>{ + X86FoldableSchedWrite sched, X86VectorVTInfo _>: + avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, sched, _, _>{ let ExeDomain = _.ExeDomain in defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), @@ -9184,13 +9153,13 @@ multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode, (OpNode (_.VT _.RC:$src1), (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))), (i8 imm:$src3))>, EVEX_B, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm) // op(reg_vec2,mem_scalar,imm) multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, X86VectorVTInfo _> { + X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), @@ -9198,7 +9167,7 @@ multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2), (i32 imm:$src3))>, - Sched<[itins.Sched]>; + Sched<[sched]>; defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3), OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", @@ -9206,13 +9175,13 @@ multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))), (i32 imm:$src3))>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } } //handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, - SDNode OpNode, OpndItins itins, + SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), @@ -9223,12 +9192,12 @@ multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr, (_.VT _.RC:$src2), (i32 imm:$src3), (i32 FROUND_NO_EXC))>, - EVEX_B, Sched<[itins.Sched]>; + EVEX_B, Sched<[sched]>; } //handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae} multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, X86VectorVTInfo _> { + X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3), @@ -9238,111 +9207,111 @@ multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode (_.VT _.RC:$src2), (i32 imm:$src3), (i32 FROUND_NO_EXC))>, - EVEX_B, Sched<[itins.Sched]>; + EVEX_B, Sched<[sched]>; } multiclass avx512_common_fp_sae_packed_imm<string OpcodeStr, AVX512VLVectorVTInfo _, bits<8> opc, SDNode OpNode, - SDNode OpNodeRnd, OpndItins itins, Predicate prd>{ + SDNode OpNodeRnd, X86FoldableSchedWrite sched, Predicate prd>{ let Predicates = [prd] in { - defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, itins, _.info512>, - avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd, itins, _.info512>, + defm Z : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched, _.info512>, + avx512_fp_sae_packed_imm<opc, OpcodeStr, OpNodeRnd, sched, _.info512>, EVEX_V512; } let Predicates = [prd, HasVLX] in { - defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, itins, _.info128>, + defm Z128 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched, _.info128>, EVEX_V128; - defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, itins, _.info256>, + defm Z256 : avx512_fp_packed_imm<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256; } } multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr, - OpndItins itins, AVX512VLVectorVTInfo DestInfo, + X86FoldableSchedWrite sched, AVX512VLVectorVTInfo DestInfo, AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> { let Predicates = [Pred] in { - defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, itins, DestInfo.info512, + defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched, DestInfo.info512, SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V; } let Predicates = [Pred, HasVLX] in { - defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, itins, DestInfo.info128, + defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched, DestInfo.info128, SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V; - defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, itins, DestInfo.info256, + defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, sched, DestInfo.info256, SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V; } } multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _, - bits<8> opc, SDNode OpNode, OpndItins itins, + bits<8> opc, SDNode OpNode, X86FoldableSchedWrite sched, Predicate Pred = HasAVX512> { let Predicates = [Pred] in { - defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512; + defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched, _.info512>, EVEX_V512; } let Predicates = [Pred, HasVLX] in { - defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, itins, _.info128>, EVEX_V128; - defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, itins, _.info256>, EVEX_V256; + defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched, _.info128>, EVEX_V128; + defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, sched, _.info256>, EVEX_V256; } } multiclass avx512_common_fp_sae_scalar_imm<string OpcodeStr, X86VectorVTInfo _, bits<8> opc, SDNode OpNode, - SDNode OpNodeRnd, OpndItins itins, Predicate prd>{ + SDNode OpNodeRnd, X86FoldableSchedWrite sched, Predicate prd>{ let Predicates = [prd] in { - defm Z128 : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, itins, _>, - avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeRnd, itins, _>; + defm Z128 : avx512_fp_scalar_imm<opc, OpcodeStr, OpNode, sched, _>, + avx512_fp_sae_scalar_imm<opc, OpcodeStr, OpNodeRnd, sched, _>; } } multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr, bits<8> opcPs, bits<8> opcPd, SDNode OpNode, - SDNode OpNodeRnd, SizeItins itins, Predicate prd>{ + SDNode OpNodeRnd, X86FoldableSchedWrite sched, Predicate prd>{ defm PS : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f32_info, - opcPs, OpNode, OpNodeRnd, itins.s, prd>, + opcPs, OpNode, OpNodeRnd, sched, prd>, EVEX_CD8<32, CD8VF>; defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info, - opcPd, OpNode, OpNodeRnd, itins.d, prd>, + opcPd, OpNode, OpNodeRnd, sched, prd>, EVEX_CD8<64, CD8VF>, VEX_W; } defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56, - X86VReduce, X86VReduceRnd, SSE_ALU_ITINS_P, HasDQI>, + X86VReduce, X86VReduceRnd, WriteFAdd, HasDQI>, AVX512AIi8Base, EVEX; defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09, - X86VRndScale, X86VRndScaleRnd, SSE_ALU_ITINS_P, HasAVX512>, + X86VRndScale, X86VRndScaleRnd, WriteFAdd, HasAVX512>, AVX512AIi8Base, EVEX; defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26, - X86VGetMant, X86VGetMantRnd, SSE_ALU_ITINS_P, HasAVX512>, + X86VGetMant, X86VGetMantRnd, WriteFAdd, HasAVX512>, AVX512AIi8Base, EVEX; defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info, 0x50, X86VRange, X86VRangeRnd, - SSE_ALU_F64P, HasDQI>, + WriteFAdd, HasDQI>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info, 0x50, X86VRange, X86VRangeRnd, - SSE_ALU_F32P, HasDQI>, + WriteFAdd, HasDQI>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd", - f64x_info, 0x51, X86Ranges, X86RangesRnd, SSE_ALU_F64S, HasDQI>, + f64x_info, 0x51, X86Ranges, X86RangesRnd, WriteFAdd, HasDQI>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info, - 0x51, X86Ranges, X86RangesRnd, SSE_ALU_F32S, HasDQI>, + 0x51, X86Ranges, X86RangesRnd, WriteFAdd, HasDQI>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info, - 0x57, X86Reduces, X86ReducesRnd, SSE_ALU_F64S, HasDQI>, + 0x57, X86Reduces, X86ReducesRnd, WriteFAdd, HasDQI>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info, - 0x57, X86Reduces, X86ReducesRnd, SSE_ALU_F32S, HasDQI>, + 0x57, X86Reduces, X86ReducesRnd, WriteFAdd, HasDQI>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info, - 0x27, X86GetMants, X86GetMantsRnd, SSE_ALU_F64S, HasAVX512>, + 0x27, X86GetMants, X86GetMantsRnd, WriteFAdd, HasAVX512>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info, - 0x27, X86GetMants, X86GetMantsRnd, SSE_ALU_F32S, HasAVX512>, + 0x27, X86GetMants, X86GetMantsRnd, WriteFAdd, HasAVX512>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; let Predicates = [HasAVX512] in { @@ -9416,7 +9385,7 @@ def : Pat<(v4f64 (ftrunc VR256X:$src)), } multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr, - OpndItins itins, X86VectorVTInfo _, + X86FoldableSchedWrite sched, X86VectorVTInfo _, X86VectorVTInfo CastInfo> { let ExeDomain = _.ExeDomain in { defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), @@ -9425,7 +9394,7 @@ multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr, (_.VT (bitconvert (CastInfo.VT (X86Shuf128 _.RC:$src1, _.RC:$src2, (i8 imm:$src3)))))>, - Sched<[itins.Sched]>; + Sched<[sched]>; defm rmi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.MemOp:$src2, u8imm:$src3), OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3", @@ -9434,7 +9403,7 @@ multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr, (CastInfo.VT (X86Shuf128 _.RC:$src1, (bitconvert (_.LdFrag addr:$src2)), (i8 imm:$src3)))))>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3), OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1", @@ -9445,29 +9414,29 @@ multiclass avx512_shuff_packed_128_common<bits<8> opc, string OpcodeStr, (X86Shuf128 _.RC:$src1, (X86VBroadcast (_.ScalarLdFrag addr:$src2)), (i8 imm:$src3)))))>, EVEX_B, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } } -multiclass avx512_shuff_packed_128<string OpcodeStr, OpndItins itins, +multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo CastInfo, bits<8> opc>{ let Predicates = [HasAVX512] in - defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, itins, + defm Z : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, _.info512, CastInfo.info512>, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in - defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, itins, + defm Z256 : avx512_shuff_packed_128_common<opc, OpcodeStr, sched, _.info256, CastInfo.info256>, EVEX_V256; } -defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", SSE_SHUFP, +defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256, avx512vl_f32_info, avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; -defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", SSE_SHUFP, +defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256, avx512vl_f64_info, avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; -defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", SSE_SHUFP, +defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256, avx512vl_i32_info, avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; -defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", SSE_SHUFP, +defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256, avx512vl_i64_info, avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; let Predicates = [HasAVX512] in { @@ -9503,20 +9472,20 @@ def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))), 0)>; } -multiclass avx512_valign<string OpcodeStr, OpndItins itins, +multiclass avx512_valign<string OpcodeStr, X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo_I> { - defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_I, 0x03, X86VAlign, itins>, + defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_I, 0x03, X86VAlign, sched>, AVX512AIi8Base, EVEX_4V; } -defm VALIGND: avx512_valign<"valignd", SSE_PALIGN, avx512vl_i32_info>, - EVEX_CD8<32, CD8VF>; -defm VALIGNQ: avx512_valign<"valignq", SSE_PALIGN, avx512vl_i64_info>, - EVEX_CD8<64, CD8VF>, VEX_W; +defm VALIGND: avx512_valign<"valignd", WriteShuffle, avx512vl_i32_info>, + EVEX_CD8<32, CD8VF>; +defm VALIGNQ: avx512_valign<"valignq", WriteShuffle, avx512vl_i64_info>, + EVEX_CD8<64, CD8VF>, VEX_W; -defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr", SSE_PALIGN, - avx512vl_i8_info, avx512vl_i8_info>, - EVEX_CD8<8, CD8VF>; +defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr", + WriteShuffle, avx512vl_i8_info, + avx512vl_i8_info>, EVEX_CD8<8, CD8VF>; // Fragments to help convert valignq into masked valignd. Or valignq/valignd // into vpalignr. @@ -9636,30 +9605,30 @@ let Predicates = [HasVLX, HasBWI] in { } defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw", - SSE_INTMUL_ITINS_P, avx512vl_i16_info, avx512vl_i8_info>, + WriteVecIMul, avx512vl_i16_info, avx512vl_i8_info>, EVEX_CD8<8, CD8VF>; multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, X86VectorVTInfo _> { + X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src1), OpcodeStr, "$src1", "$src1", (_.VT (OpNode _.RC:$src1))>, EVEX, AVX5128IBase, - Sched<[itins.Sched]>; + Sched<[sched]>; defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.MemOp:$src1), OpcodeStr, "$src1", "$src1", (_.VT (OpNode (bitconvert (_.LdFrag addr:$src1))))>, EVEX, AVX5128IBase, EVEX_CD8<_.EltSize, CD8VF>, - Sched<[itins.Sched.Folded]>; + Sched<[sched.Folded]>; } } multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, X86VectorVTInfo _> : - avx512_unary_rm<opc, OpcodeStr, OpNode, itins, _> { + X86FoldableSchedWrite sched, X86VectorVTInfo _> : + avx512_unary_rm<opc, OpcodeStr, OpNode, sched, _> { defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.ScalarMemOp:$src1), OpcodeStr, "${src1}"##_.BroadcastStr, @@ -9667,66 +9636,66 @@ multiclass avx512_unary_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode, (_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src1))))>, EVEX, AVX5128IBase, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>, - Sched<[itins.Sched.Folded]>; + Sched<[sched.Folded]>; } multiclass avx512_unary_rm_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, AVX512VLVectorVTInfo VTInfo, + X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo, Predicate prd> { let Predicates = [prd] in - defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, itins, VTInfo.info512>, + defm Z : avx512_unary_rm<opc, OpcodeStr, OpNode, sched, VTInfo.info512>, EVEX_V512; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, itins, VTInfo.info256>, + defm Z256 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched, VTInfo.info256>, EVEX_V256; - defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, itins, VTInfo.info128>, + defm Z128 : avx512_unary_rm<opc, OpcodeStr, OpNode, sched, VTInfo.info128>, EVEX_V128; } } multiclass avx512_unary_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, AVX512VLVectorVTInfo VTInfo, + X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo, Predicate prd> { let Predicates = [prd] in - defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info512>, + defm Z : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched, VTInfo.info512>, EVEX_V512; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info256>, + defm Z256 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched, VTInfo.info256>, EVEX_V256; - defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, itins, VTInfo.info128>, + defm Z128 : avx512_unary_rmb<opc, OpcodeStr, OpNode, sched, VTInfo.info128>, EVEX_V128; } } multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr, - SDNode OpNode, OpndItins itins, Predicate prd> { - defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, itins, + SDNode OpNode, X86FoldableSchedWrite sched, Predicate prd> { + defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched, avx512vl_i64_info, prd>, VEX_W; - defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, itins, + defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched, avx512vl_i32_info, prd>; } multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr, - SDNode OpNode, OpndItins itins, Predicate prd> { - defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, itins, + SDNode OpNode, X86FoldableSchedWrite sched, Predicate prd> { + defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched, avx512vl_i16_info, prd>, VEX_WIG; - defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, itins, + defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched, avx512vl_i8_info, prd>, VEX_WIG; } multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w, bits<8> opc_d, bits<8> opc_q, string OpcodeStr, SDNode OpNode, - OpndItins itins> { - defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, itins, + X86FoldableSchedWrite sched> { + defm NAME : avx512_unary_rm_vl_dq<opc_d, opc_q, OpcodeStr, OpNode, sched, HasAVX512>, - avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, itins, + avx512_unary_rm_vl_bw<opc_b, opc_w, OpcodeStr, OpNode, sched, HasBWI>; } -defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs, SSE_PABS>; +defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", abs, WriteVecALU>; // VPABS: Use 512bit version to implement 128/256 bit in case NoVLX. let Predicates = [HasAVX512, NoVLX] in { @@ -9764,13 +9733,13 @@ multiclass avx512_unary_lowering<string InstrStr, SDNode OpNode, } } -// FIXME: Is there a better scheduler itinerary for VPLZCNT? +// FIXME: Is there a better scheduler class for VPLZCNT? defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz, - SSE_INTALU_ITINS_P, HasCDI>; + WriteVecALU, HasCDI>; -// FIXME: Is there a better scheduler itinerary for VPCONFLICT? +// FIXME: Is there a better scheduler class for VPCONFLICT? defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, - SSE_INTALU_ITINS_P, HasCDI>; + WriteVecALU, HasCDI>; // VPLZCNT: Use 512bit version to implement 128/256 bit in case NoVLX. defm : avx512_unary_lowering<"VPLZCNTQ", ctlz, avx512vl_i64_info, HasCDI>; @@ -9780,9 +9749,9 @@ defm : avx512_unary_lowering<"VPLZCNTD", ctlz, avx512vl_i32_info, HasCDI>; // Counts number of ones - VPOPCNTD and VPOPCNTQ //===---------------------------------------------------------------------===// -// FIXME: Is there a better scheduler itinerary for VPOPCNTD/VPOPCNTQ? +// FIXME: Is there a better scheduler class for VPOPCNTD/VPOPCNTQ? defm VPOPCNT : avx512_unary_rm_vl_dq<0x55, 0x55, "vpopcnt", ctpop, - SSE_INTALU_ITINS_P, HasVPOPCNTDQ>; + WriteVecALU, HasVPOPCNTDQ>; defm : avx512_unary_lowering<"VPOPCNTQ", ctpop, avx512vl_i64_info, HasVPOPCNTDQ>; defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ>; @@ -9791,54 +9760,54 @@ defm : avx512_unary_lowering<"VPOPCNTD", ctpop, avx512vl_i32_info, HasVPOPCNTDQ> // Replicate Single FP - MOVSHDUP and MOVSLDUP //===---------------------------------------------------------------------===// multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins> { - defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, itins, + X86FoldableSchedWrite sched> { + defm NAME: avx512_unary_rm_vl<opc, OpcodeStr, OpNode, sched, avx512vl_f32_info, HasAVX512>, XS; } -defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup, SSE_MOVDDUP>; -defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup, SSE_MOVDDUP>; +defm VMOVSHDUP : avx512_replicate<0x16, "vmovshdup", X86Movshdup, WriteFShuffle>; +defm VMOVSLDUP : avx512_replicate<0x12, "vmovsldup", X86Movsldup, WriteFShuffle>; //===----------------------------------------------------------------------===// // AVX-512 - MOVDDUP //===----------------------------------------------------------------------===// multiclass avx512_movddup_128<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, X86VectorVTInfo _> { + X86FoldableSchedWrite sched, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in { defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src), OpcodeStr, "$src", "$src", (_.VT (OpNode (_.VT _.RC:$src)))>, EVEX, - Sched<[itins.Sched]>; + Sched<[sched]>; defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.ScalarMemOp:$src), OpcodeStr, "$src", "$src", (_.VT (OpNode (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src)))))>, EVEX, EVEX_CD8<_.EltSize, CD8VH>, - Sched<[itins.Sched.Folded]>; + Sched<[sched.Folded]>; } } multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, AVX512VLVectorVTInfo VTInfo> { + X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTInfo> { - defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, itins, VTInfo.info512>, EVEX_V512; + defm Z : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched, VTInfo.info512>, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in { - defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, itins, VTInfo.info256>, + defm Z256 : avx512_unary_rm<opc, OpcodeStr, X86Movddup, sched, VTInfo.info256>, EVEX_V256; - defm Z128 : avx512_movddup_128<opc, OpcodeStr, X86VBroadcast, itins, VTInfo.info128>, + defm Z128 : avx512_movddup_128<opc, OpcodeStr, X86VBroadcast, sched, VTInfo.info128>, EVEX_V128; } } multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins> { - defm NAME: avx512_movddup_common<opc, OpcodeStr, OpNode, itins, + X86FoldableSchedWrite sched> { + defm NAME: avx512_movddup_common<opc, OpcodeStr, OpNode, sched, avx512vl_f64_info>, XD, VEX_W; } -defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, SSE_MOVDDUP>; +defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup, WriteFShuffle>; let Predicates = [HasVLX] in { def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))), @@ -9874,28 +9843,29 @@ def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$sr //===----------------------------------------------------------------------===// // AVX-512 - Unpack Instructions //===----------------------------------------------------------------------===// + defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512, - SSE_ALU_ITINS_S>; + WriteFAdd>; defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512, - SSE_ALU_ITINS_S>; + WriteFAdd>; defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl, - SSE_INTALU_ITINS_P, HasBWI>; + WriteShuffle, HasBWI>; defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh, - SSE_INTALU_ITINS_P, HasBWI>; + WriteShuffle, HasBWI>; defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl, - SSE_INTALU_ITINS_P, HasBWI>; + WriteShuffle, HasBWI>; defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh, - SSE_INTALU_ITINS_P, HasBWI>; + WriteShuffle, HasBWI>; defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl, - SSE_INTALU_ITINS_P, HasAVX512>; + WriteShuffle, HasAVX512>; defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh, - SSE_INTALU_ITINS_P, HasAVX512>; + WriteShuffle, HasAVX512>; defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl, - SSE_INTALU_ITINS_P, HasAVX512>; + WriteShuffle, HasAVX512>; defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh, - SSE_INTALU_ITINS_P, HasAVX512>; + WriteShuffle, HasAVX512>; //===----------------------------------------------------------------------===// // AVX-512 - Extract & Insert Integer Instructions @@ -10022,7 +9992,7 @@ defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W; multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I, AVX512VLVectorVTInfo VTInfo_FP>{ defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp, - SSE_SHUFP>, EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>, + WriteFShuffle>, EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>, AVX512AIi8Base, EVEX_4V; } @@ -10033,51 +10003,46 @@ defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, // AVX-512 - Byte shift Left/Right //===----------------------------------------------------------------------===// -let Sched = WriteVecShift in -def AVX512_BYTESHIFT : OpndItins< - NoItinerary, NoItinerary ->; - multiclass avx512_shift_packed<bits<8> opc, SDNode OpNode, Format MRMr, Format MRMm, string OpcodeStr, - OpndItins itins, X86VectorVTInfo _>{ + X86FoldableSchedWrite sched, X86VectorVTInfo _>{ def rr : AVX512<opc, MRMr, (outs _.RC:$dst), (ins _.RC:$src1, u8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set _.RC:$dst,(_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>, - Sched<[itins.Sched]>; + Sched<[sched]>; def rm : AVX512<opc, MRMm, (outs _.RC:$dst), (ins _.MemOp:$src1, u8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set _.RC:$dst,(_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))), (i8 imm:$src2))))]>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr, Format MRMm, string OpcodeStr, - OpndItins itins, Predicate prd>{ + X86FoldableSchedWrite sched, Predicate prd>{ let Predicates = [prd] in defm Z : avx512_shift_packed<opc, OpNode, MRMr, MRMm, - OpcodeStr, itins, v64i8_info>, EVEX_V512; + OpcodeStr, sched, v64i8_info>, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, - OpcodeStr, itins, v32i8x_info>, EVEX_V256; + OpcodeStr, sched, v32i8x_info>, EVEX_V256; defm Z128 : avx512_shift_packed<opc, OpNode, MRMr, MRMm, - OpcodeStr, itins, v16i8x_info>, EVEX_V128; + OpcodeStr, sched, v16i8x_info>, EVEX_V128; } } defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq", - AVX512_BYTESHIFT, HasBWI>, AVX512PDIi8Base, + WriteVecShift, HasBWI>, AVX512PDIi8Base, EVEX_4V, VEX_WIG; defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq", - AVX512_BYTESHIFT, HasBWI>, AVX512PDIi8Base, + WriteVecShift, HasBWI>, AVX512PDIi8Base, EVEX_4V, VEX_WIG; multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode, - string OpcodeStr, OpndItins itins, + string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _dst, X86VectorVTInfo _src> { def rr : AVX512BI<opc, MRMSrcReg, (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.RC:$src2), @@ -10085,7 +10050,7 @@ multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode, [(set _dst.RC:$dst,(_dst.VT (OpNode (_src.VT _src.RC:$src1), (_src.VT _src.RC:$src2))))]>, - Sched<[itins.Sched]>; + Sched<[sched]>; def rm : AVX512BI<opc, MRMSrcMem, (outs _dst.RC:$dst), (ins _src.RC:$src1, _src.MemOp:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), @@ -10093,25 +10058,25 @@ multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode, (OpNode (_src.VT _src.RC:$src1), (_src.VT (bitconvert (_src.LdFrag addr:$src2))))))]>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode, - string OpcodeStr, OpndItins itins, + string OpcodeStr, X86FoldableSchedWrite sched, Predicate prd> { let Predicates = [prd] in - defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, itins, v8i64_info, + defm Z : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched, v8i64_info, v64i8_info>, EVEX_V512; let Predicates = [prd, HasVLX] in { - defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, itins, v4i64x_info, + defm Z256 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched, v4i64x_info, v32i8x_info>, EVEX_V256; - defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, itins, v2i64x_info, + defm Z128 : avx512_psadbw_packed<opc, OpNode, OpcodeStr, sched, v2i64x_info, v16i8x_info>, EVEX_V128; } } defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw", - SSE_MPSADBW_ITINS, HasBWI>, EVEX_4V, VEX_WIG; + WriteMPSAD, HasBWI>, EVEX_4V, VEX_WIG; // Transforms to swizzle an immediate to enable better matching when // memory operand isn't in the right place. @@ -10176,7 +10141,7 @@ def VPTERNLOG312_imm8 : SDNodeXForm<imm, [{ }]>; multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, X86VectorVTInfo _>{ + X86FoldableSchedWrite sched, X86VectorVTInfo _>{ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src2, _.RC:$src3, u8imm:$src4), @@ -10185,7 +10150,7 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode, (_.VT _.RC:$src2), (_.VT _.RC:$src3), (i8 imm:$src4)), 1, 1>, - AVX512AIi8Base, EVEX_4V, Sched<[itins.Sched]>; + AVX512AIi8Base, EVEX_4V, Sched<[sched]>; defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4), OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4", @@ -10194,7 +10159,7 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode, (_.VT (bitconvert (_.LdFrag addr:$src3))), (i8 imm:$src4)), 1, 0>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4), OpcodeStr, "$src4, ${src3}"##_.BroadcastStr##", $src2", @@ -10204,7 +10169,7 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode, (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))), (i8 imm:$src4)), 1, 0>, EVEX_B, AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; }// Constraints = "$src1 = $dst" // Additional patterns for matching passthru operand in other positions. @@ -10343,19 +10308,19 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode, _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>; } -multiclass avx512_common_ternlog<string OpcodeStr, OpndItins itins, +multiclass avx512_common_ternlog<string OpcodeStr, X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _> { let Predicates = [HasAVX512] in - defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info512>, EVEX_V512; + defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched, _.info512>, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in { - defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info128>, EVEX_V128; - defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, itins, _.info256>, EVEX_V256; + defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched, _.info128>, EVEX_V128; + defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, sched, _.info256>, EVEX_V256; } } -defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SSE_INTALU_ITINS_P, +defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", WriteVecALU, avx512vl_i32_info>; -defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SSE_INTALU_ITINS_P, +defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", WriteVecALU, avx512vl_i64_info>, VEX_W; @@ -10399,7 +10364,7 @@ let Predicates = [HasVLX] in { //===----------------------------------------------------------------------===// multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, X86VectorVTInfo _>{ + X86FoldableSchedWrite sched, X86VectorVTInfo _>{ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), @@ -10408,7 +10373,7 @@ multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, (_.VT _.RC:$src2), (_.IntVT _.RC:$src3), (i32 imm:$src4), - (i32 FROUND_CURRENT))>, Sched<[itins.Sched]>; + (i32 FROUND_CURRENT))>, Sched<[sched]>; defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src2, _.MemOp:$src3, i32u8imm:$src4), OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", @@ -10417,7 +10382,7 @@ multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, (_.IntVT (bitconvert (_.LdFrag addr:$src3))), (i32 imm:$src4), (i32 FROUND_CURRENT))>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), OpcodeStr##_.Suffix, "$src4, ${src3}"##_.BroadcastStr##", $src2", @@ -10427,12 +10392,12 @@ multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, (_.IntVT (X86VBroadcast(_.ScalarLdFrag addr:$src3))), (i32 imm:$src4), (i32 FROUND_CURRENT))>, - EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>; + EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; } // Constraints = "$src1 = $dst" } multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr, - SDNode OpNode, OpndItins itins, + SDNode OpNode, X86FoldableSchedWrite sched, X86VectorVTInfo _>{ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), @@ -10444,12 +10409,12 @@ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { (_.IntVT _.RC:$src3), (i32 imm:$src4), (i32 FROUND_NO_EXC))>, - EVEX_B, Sched<[itins.Sched]>; + EVEX_B, Sched<[sched]>; } } multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, X86VectorVTInfo _, + X86FoldableSchedWrite sched, X86VectorVTInfo _, X86VectorVTInfo _src3VT> { let Constraints = "$src1 = $dst" , Predicates = [HasAVX512], ExeDomain = _.ExeDomain in { @@ -10460,7 +10425,7 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, (_.VT _.RC:$src2), (_src3VT.VT _src3VT.RC:$src3), (i32 imm:$src4), - (i32 FROUND_CURRENT))>, Sched<[itins.Sched]>; + (i32 FROUND_CURRENT))>, Sched<[sched]>; defm rrib : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4), OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2", @@ -10470,7 +10435,7 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, (_src3VT.VT _src3VT.RC:$src3), (i32 imm:$src4), (i32 FROUND_NO_EXC))>, - EVEX_B, Sched<[itins.Sched.Folded, ReadAfterLd]>; + EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; defm rmi : AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src2, _.ScalarMemOp:$src3, i32u8imm:$src4), OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4", @@ -10480,33 +10445,33 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, (_src3VT.ScalarLdFrag addr:$src3))), (i32 imm:$src4), (i32 FROUND_CURRENT))>, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } } -multiclass avx512_fixupimm_packed_all<OpndItins itins, AVX512VLVectorVTInfo _Vec> { +multiclass avx512_fixupimm_packed_all<X86FoldableSchedWrite sched, AVX512VLVectorVTInfo _Vec> { let Predicates = [HasAVX512] in - defm Z : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, itins, + defm Z : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched, _Vec.info512>, - avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, itins, + avx512_fixupimm_packed_sae<0x54, "vfixupimm", X86VFixupimm, sched, _Vec.info512>, AVX512AIi8Base, EVEX_4V, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in { - defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, itins, + defm Z128 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched, _Vec.info128>, AVX512AIi8Base, EVEX_4V, EVEX_V128; - defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, itins, + defm Z256 : avx512_fixupimm_packed<0x54, "vfixupimm", X86VFixupimm, sched, _Vec.info256>, AVX512AIi8Base, EVEX_4V, EVEX_V256; } } defm VFIXUPIMMSS : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar, - SSE_ALU_F32S, f32x_info, v4i32x_info>, + WriteFAdd, f32x_info, v4i32x_info>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>; defm VFIXUPIMMSD : avx512_fixupimm_scalar<0x55, "vfixupimm", X86VFixupimmScalar, - SSE_ALU_F64S, f64x_info, v2i64x_info>, + WriteFAdd, f64x_info, v2i64x_info>, AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W; -defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SSE_ALU_F32P, avx512vl_f32_info>, +defm VFIXUPIMMPS : avx512_fixupimm_packed_all<WriteFAdd, avx512vl_f32_info>, EVEX_CD8<32, CD8VF>; -defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SSE_ALU_F64P, avx512vl_f64_info>, +defm VFIXUPIMMPD : avx512_fixupimm_packed_all<WriteFAdd, avx512vl_f64_info>, EVEX_CD8<64, CD8VF>, VEX_W; @@ -10671,27 +10636,27 @@ defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>; //===----------------------------------------------------------------------===// multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode, - OpndItins itins, X86VectorVTInfo VTI> { + X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { let Constraints = "$src1 = $dst", ExeDomain = VTI.ExeDomain in { defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, "$src3, $src2", "$src2, $src3", (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>, - AVX512FMA3Base, Sched<[itins.Sched]>; + AVX512FMA3Base, Sched<[sched]>; defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, "$src3, $src2", "$src2, $src3", (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, (VTI.VT (bitconvert (VTI.LdFrag addr:$src3)))))>, AVX512FMA3Base, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } } multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode, - OpndItins itins, X86VectorVTInfo VTI> - : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI> { + X86FoldableSchedWrite sched, X86VectorVTInfo VTI> + : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI> { let Constraints = "$src1 = $dst", ExeDomain = VTI.ExeDomain in defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), @@ -10701,64 +10666,64 @@ multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode, (OpNode VTI.RC:$src1, VTI.RC:$src2, (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3))))>, AVX512FMA3Base, EVEX_B, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode, - OpndItins itins, AVX512VLVectorVTInfo VTI> { + X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTI> { let Predicates = [HasVBMI2] in - defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI.info512>, EVEX_V512; + defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI.info512>, EVEX_V512; let Predicates = [HasVBMI2, HasVLX] in { - defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI.info256>, EVEX_V256; - defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI.info128>, EVEX_V128; + defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI.info256>, EVEX_V256; + defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, sched, VTI.info128>, EVEX_V128; } } multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode, - OpndItins itins, AVX512VLVectorVTInfo VTI> { + X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTI> { let Predicates = [HasVBMI2] in - defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, itins, VTI.info512>, EVEX_V512; + defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched, VTI.info512>, EVEX_V512; let Predicates = [HasVBMI2, HasVLX] in { - defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, itins, VTI.info256>, EVEX_V256; - defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, itins, VTI.info128>, EVEX_V128; + defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched, VTI.info256>, EVEX_V256; + defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, sched, VTI.info128>, EVEX_V128; } } multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix, - SDNode OpNode, OpndItins itins> { - defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, itins, + SDNode OpNode, X86FoldableSchedWrite sched> { + defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, sched, avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>; - defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, itins, + defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; - defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, itins, + defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, sched, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; } multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix, - SDNode OpNode, OpndItins itins> { - defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", itins, + SDNode OpNode, X86FoldableSchedWrite sched> { + defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", sched, avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>, VEX_W, EVEX_CD8<16, CD8VF>; defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp, - OpNode, itins, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; + OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>; defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode, - itins, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; + sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W; } // Concat & Shift -defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SSE_INTMUL_ITINS_P>; -defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SSE_INTMUL_ITINS_P>; -defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SSE_INTMUL_ITINS_P>; -defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SSE_INTMUL_ITINS_P>; +defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, WriteVecIMul>; +defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, WriteVecIMul>; +defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, WriteVecIMul>; +defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, WriteVecIMul>; // Compress -defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", AVX512_COMPRESS, +defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256, avx512vl_i8_info, HasVBMI2>, EVEX; -defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", AVX512_COMPRESS, +defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256, avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W; // Expand -defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", AVX512_EXPAND, +defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256, avx512vl_i8_info, HasVBMI2>, EVEX; -defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", AVX512_EXPAND, +defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256, avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W; //===----------------------------------------------------------------------===// @@ -10767,13 +10732,13 @@ defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", AVX512_EXPAND, let Constraints = "$src1 = $dst" in multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode, - OpndItins itins, X86VectorVTInfo VTI> { + X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { defm r : AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst), (ins VTI.RC:$src2, VTI.RC:$src3), OpStr, "$src3, $src2", "$src2, $src3", (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>, - EVEX_4V, T8PD, Sched<[itins.Sched]>; + EVEX_4V, T8PD, Sched<[sched]>; defm m : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr, "$src3, $src2", "$src2, $src3", @@ -10781,7 +10746,7 @@ multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode, (VTI.VT (bitconvert (VTI.LdFrag addr:$src3)))))>, EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr, "${src3}"##VTI.BroadcastStr##", $src2", @@ -10790,45 +10755,45 @@ multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode, (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3))))>, EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B, - T8PD, Sched<[itins.Sched.Folded, ReadAfterLd]>; + T8PD, Sched<[sched.Folded, ReadAfterLd]>; } -multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode, OpndItins itins> { +multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode, X86FoldableSchedWrite sched> { let Predicates = [HasVNNI] in - defm Z : VNNI_rmb<Op, OpStr, OpNode, itins, v16i32_info>, EVEX_V512; + defm Z : VNNI_rmb<Op, OpStr, OpNode, sched, v16i32_info>, EVEX_V512; let Predicates = [HasVNNI, HasVLX] in { - defm Z256 : VNNI_rmb<Op, OpStr, OpNode, itins, v8i32x_info>, EVEX_V256; - defm Z128 : VNNI_rmb<Op, OpStr, OpNode, itins, v4i32x_info>, EVEX_V128; + defm Z256 : VNNI_rmb<Op, OpStr, OpNode, sched, v8i32x_info>, EVEX_V256; + defm Z128 : VNNI_rmb<Op, OpStr, OpNode, sched, v4i32x_info>, EVEX_V128; } } -// FIXME: Is there a better scheduler itinerary for VPDP? -defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, SSE_PMADD>; -defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, SSE_PMADD>; -defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, SSE_PMADD>; -defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, SSE_PMADD>; +// FIXME: Is there a better scheduler class for VPDP? +defm VPDPBUSD : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd, WriteVecIMul>; +defm VPDPBUSDS : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds, WriteVecIMul>; +defm VPDPWSSD : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd, WriteVecIMul>; +defm VPDPWSSDS : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds, WriteVecIMul>; //===----------------------------------------------------------------------===// // Bit Algorithms //===----------------------------------------------------------------------===// -// FIXME: Is there a better scheduler itinerary for VPOPCNTB/VPOPCNTW? -defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SSE_INTALU_ITINS_P, +// FIXME: Is there a better scheduler class for VPOPCNTB/VPOPCNTW? +defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, WriteVecALU, avx512vl_i8_info, HasBITALG>; -defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SSE_INTALU_ITINS_P, +defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, WriteVecALU, avx512vl_i16_info, HasBITALG>, VEX_W; defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>; defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>; -multiclass VPSHUFBITQMB_rm<OpndItins itins, X86VectorVTInfo VTI> { +multiclass VPSHUFBITQMB_rm<X86FoldableSchedWrite sched, X86VectorVTInfo VTI> { defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst), (ins VTI.RC:$src1, VTI.RC:$src2), "vpshufbitqmb", "$src2, $src1", "$src1, $src2", (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD, - Sched<[itins.Sched]>; + Sched<[sched]>; defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst), (ins VTI.RC:$src1, VTI.MemOp:$src2), "vpshufbitqmb", @@ -10836,20 +10801,20 @@ multiclass VPSHUFBITQMB_rm<OpndItins itins, X86VectorVTInfo VTI> { (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), (VTI.VT (bitconvert (VTI.LdFrag addr:$src2))))>, EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } -multiclass VPSHUFBITQMB_common<OpndItins itins, AVX512VLVectorVTInfo VTI> { +multiclass VPSHUFBITQMB_common<X86FoldableSchedWrite sched, AVX512VLVectorVTInfo VTI> { let Predicates = [HasBITALG] in - defm Z : VPSHUFBITQMB_rm<itins, VTI.info512>, EVEX_V512; + defm Z : VPSHUFBITQMB_rm<sched, VTI.info512>, EVEX_V512; let Predicates = [HasBITALG, HasVLX] in { - defm Z256 : VPSHUFBITQMB_rm<itins, VTI.info256>, EVEX_V256; - defm Z128 : VPSHUFBITQMB_rm<itins, VTI.info128>, EVEX_V128; + defm Z256 : VPSHUFBITQMB_rm<sched, VTI.info256>, EVEX_V256; + defm Z128 : VPSHUFBITQMB_rm<sched, VTI.info128>, EVEX_V128; } } -// FIXME: Is there a better scheduler itinerary for VPSHUFBITQMB? -defm VPSHUFBITQMB : VPSHUFBITQMB_common<SSE_INTMUL_ITINS_P, avx512vl_i8_info>; +// FIXME: Is there a better scheduler class for VPSHUFBITQMB? +defm VPSHUFBITQMB : VPSHUFBITQMB_common<WriteVecIMul, avx512vl_i8_info>; //===----------------------------------------------------------------------===// // GFNI @@ -10858,12 +10823,12 @@ defm VPSHUFBITQMB : VPSHUFBITQMB_common<SSE_INTMUL_ITINS_P, avx512vl_i8_info>; multiclass GF2P8MULB_avx512_common<bits<8> Op, string OpStr, SDNode OpNode> { let Predicates = [HasGFNI, HasAVX512, HasBWI] in defm Z : avx512_binop_rm<Op, OpStr, OpNode, v64i8_info, - SSE_INTALU_ITINS_P, 1>, EVEX_V512; + WriteVecALU, 1>, EVEX_V512; let Predicates = [HasGFNI, HasVLX, HasBWI] in { defm Z256 : avx512_binop_rm<Op, OpStr, OpNode, v32i8x_info, - SSE_INTALU_ITINS_P, 1>, EVEX_V256; + WriteVecALU, 1>, EVEX_V256; defm Z128 : avx512_binop_rm<Op, OpStr, OpNode, v16i8x_info, - SSE_INTALU_ITINS_P, 1>, EVEX_V128; + WriteVecALU, 1>, EVEX_V128; } } @@ -10871,9 +10836,9 @@ defm VGF2P8MULB : GF2P8MULB_avx512_common<0xCF, "vgf2p8mulb", X86GF2P8mulb>, EVEX_CD8<8, CD8VF>, T8PD; multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode, - OpndItins itins, X86VectorVTInfo VTI, + X86FoldableSchedWrite sched, X86VectorVTInfo VTI, X86VectorVTInfo BcstVTI> - : avx512_3Op_rm_imm8<Op, OpStr, OpNode, itins, VTI, VTI> { + : avx512_3Op_rm_imm8<Op, OpStr, OpNode, sched, VTI, VTI> { let ExeDomain = VTI.ExeDomain in defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst), (ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3), @@ -10882,26 +10847,26 @@ multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode, (OpNode (VTI.VT VTI.RC:$src1), (bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))), (i8 imm:$src3))>, EVEX_B, - Sched<[itins.Sched.Folded, ReadAfterLd]>; + Sched<[sched.Folded, ReadAfterLd]>; } multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode, - OpndItins itins> { + X86FoldableSchedWrite sched> { let Predicates = [HasGFNI, HasAVX512, HasBWI] in - defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, itins, v64i8_info, + defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched, v64i8_info, v8i64_info>, EVEX_V512; let Predicates = [HasGFNI, HasVLX, HasBWI] in { - defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, itins, v32i8x_info, + defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched, v32i8x_info, v4i64x_info>, EVEX_V256; - defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, itins, v16i8x_info, + defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, sched, v16i8x_info, v2i64x_info>, EVEX_V128; } } defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb", - X86GF2P8affineinvqb, SSE_INTMUL_ITINS_P>, + X86GF2P8affineinvqb, WriteVecIMul>, EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base; defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb", - X86GF2P8affineqb, SSE_INTMUL_ITINS_P>, + X86GF2P8affineqb, WriteVecIMul>, EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 668a5a9e0e2..558903e9308 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -13,165 +13,6 @@ // //===----------------------------------------------------------------------===// -class OpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm> { - InstrItinClass rr = arg_rr; - InstrItinClass rm = arg_rm; - // InstrSchedModel info. - X86FoldableSchedWrite Sched = WriteFAdd; -} - -class SizeItins<OpndItins arg_s, OpndItins arg_d> { - OpndItins s = arg_s; - OpndItins d = arg_d; -} - -// scalar -let Sched = WriteFAdd in { -def SSE_ALU_F32S : OpndItins< - NoItinerary, NoItinerary ->; - -def SSE_ALU_F64S : OpndItins< - NoItinerary, NoItinerary ->; -} - -def SSE_ALU_ITINS_S : SizeItins< - SSE_ALU_F32S, SSE_ALU_F64S ->; - -let Sched = WriteFMul in { -def SSE_MUL_F32S : OpndItins< - NoItinerary, NoItinerary ->; - -def SSE_MUL_F64S : OpndItins< - NoItinerary, NoItinerary ->; -} - -def SSE_MUL_ITINS_S : SizeItins< - SSE_MUL_F32S, SSE_MUL_F64S ->; - -let Sched = WriteFDiv in { -def SSE_DIV_F32S : OpndItins< - NoItinerary, NoItinerary ->; - -def SSE_DIV_F64S : OpndItins< - NoItinerary, NoItinerary ->; -} - -def SSE_DIV_ITINS_S : SizeItins< - SSE_DIV_F32S, SSE_DIV_F64S ->; - -// parallel -let Sched = WriteFAdd in { -def SSE_ALU_F32P : OpndItins< - NoItinerary, NoItinerary ->; - -def SSE_ALU_F64P : OpndItins< - NoItinerary, NoItinerary ->; -} - -def SSE_ALU_ITINS_P : SizeItins< - SSE_ALU_F32P, SSE_ALU_F64P ->; - -let Sched = WriteFMul in { -def SSE_MUL_F32P : OpndItins< - NoItinerary, NoItinerary ->; - -def SSE_MUL_F64P : OpndItins< - NoItinerary, NoItinerary ->; -} - -def SSE_MUL_ITINS_P : SizeItins< - SSE_MUL_F32P, SSE_MUL_F64P ->; - -let Sched = WriteFDiv in { -def SSE_DIV_F32P : OpndItins< - NoItinerary, NoItinerary ->; - -def SSE_DIV_F64P : OpndItins< - NoItinerary, NoItinerary ->; -} - -def SSE_DIV_ITINS_P : SizeItins< - SSE_DIV_F32P, SSE_DIV_F64P ->; - -let Sched = WriteVecLogic in -def SSE_BIT_ITINS_P : OpndItins< - NoItinerary, NoItinerary ->; - -let Sched = WriteVecALU in { -def SSE_INTALU_ITINS_P : OpndItins< - NoItinerary, NoItinerary ->; - -def SSE_INTALUQ_ITINS_P : OpndItins< - NoItinerary, NoItinerary ->; -} - -let Sched = WriteVecIMul in -def SSE_INTMUL_ITINS_P : OpndItins< - NoItinerary, NoItinerary ->; - -let Sched = WriteVecShift in -def SSE_INTSHIFT_P : OpndItins< - NoItinerary, NoItinerary ->; - -def SSE_MOVA_ITINS : OpndItins< - NoItinerary, NoItinerary ->; - -def SSE_MOVU_ITINS : OpndItins< - NoItinerary, NoItinerary ->; - -def SSE_DPPD_ITINS : OpndItins< - NoItinerary, NoItinerary ->; - -def SSE_DPPS_ITINS : OpndItins< - NoItinerary, NoItinerary ->; - -let Sched = WriteMPSAD in -def SSE_MPSADBW_ITINS : OpndItins< - NoItinerary, NoItinerary ->; - -let Sched = WritePMULLD in -def SSE_PMULLD_ITINS : OpndItins< - NoItinerary, NoItinerary ->; - -let Sched = WriteShuffle in -def SSE_INTALU_ITINS_SHUFF_P : OpndItins< - NoItinerary, NoItinerary ->; - -let Sched = WriteShuffle in -def SSE_PACK : OpndItins< - NoItinerary, NoItinerary ->; - //===----------------------------------------------------------------------===// // SSE 1 & 2 Instructions Classes //===----------------------------------------------------------------------===// @@ -1046,73 +887,6 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in { // SSE 1 & 2 - Conversion Instructions //===----------------------------------------------------------------------===// -let Sched = WriteCvtF2I in { -def SSE_CVT_SS2SI_32 : OpndItins< - NoItinerary, NoItinerary ->; - -let Sched = WriteCvtF2I in -def SSE_CVT_SS2SI_64 : OpndItins< - NoItinerary, NoItinerary ->; - -def SSE_CVT_SD2SI : OpndItins< - NoItinerary, NoItinerary ->; - -def SSE_CVT_PS2I : OpndItins< - NoItinerary, NoItinerary ->; - -def SSE_CVT_PD2I : OpndItins< - NoItinerary, NoItinerary ->; -} - -let Sched = WriteCvtI2F in { -def SSE_CVT_SI2SS : OpndItins< - NoItinerary, NoItinerary ->; - -def SSE_CVT_SI2SD : OpndItins< - NoItinerary, NoItinerary ->; - -def SSE_CVT_I2PS : OpndItins< - NoItinerary, NoItinerary ->; - -def SSE_CVT_I2PD : OpndItins< - NoItinerary, NoItinerary ->; -} - -let Sched = WriteCvtF2F in { -def SSE_CVT_SD2SS : OpndItins< - NoItinerary, NoItinerary ->; - -def SSE_CVT_SS2SD : OpndItins< - NoItinerary, NoItinerary ->; - -def SSE_CVT_PD2PS : OpndItins< - NoItinerary, NoItinerary ->; - -def SSE_CVT_PS2PD : OpndItins< - NoItinerary, NoItinerary ->; - -def SSE_CVT_PH2PS : OpndItins< - NoItinerary, NoItinerary ->; - -def SSE_CVT_PS2PH : OpndItins< - NoItinerary, NoItinerary ->; -} - // FIXME: We probably want to match the rm form only when optimizing for // size, to avoid false depenendecies (see sse_fp_unop_s for details) multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, @@ -2048,11 +1822,6 @@ let Predicates = [UseSSE2] in { // SSE 1 & 2 - Compare Instructions //===----------------------------------------------------------------------===// -let Sched = WriteFAdd in -def SSE_COMIS : OpndItins< - NoItinerary, NoItinerary ->; - // sse12_cmp_scalar - sse 1 & 2 compare scalar instructions multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop, Operand CC, SDNode OpNode, ValueType VT, @@ -2339,11 +2108,6 @@ let Predicates = [UseSSE1] in { // SSE 1 & 2 - Shuffle Instructions //===----------------------------------------------------------------------===// -let Sched = WriteFShuffle in -def SSE_SHUFP : OpndItins< - NoItinerary, NoItinerary ->; - /// sse12_shuffle - sse 1 & 2 fp shuffle instructions multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop, ValueType vt, string asm, PatFrag mem_frag, @@ -2387,11 +2151,6 @@ let Constraints = "$src1 = $dst" in { // SSE 1 & 2 - Unpack FP Instructions //===----------------------------------------------------------------------===// -let Sched = WriteFShuffle in -def SSE_UNPCK : OpndItins< - NoItinerary, NoItinerary ->; - /// sse12_unpack_interleave - sse 1 & 2 fp unpack and interleave multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt, PatFrag mem_frag, RegisterClass RC, @@ -2953,7 +2712,6 @@ defm : scalar_math_f64_patterns<fsub, "SUB">; defm : scalar_math_f64_patterns<fmul, "MUL">; defm : scalar_math_f64_patterns<fdiv, "DIV">; - /// Unop Arithmetic /// In addition, we also have a special variant of the scalar form here to /// represent the associated intrinsic operation. This form is unlike the @@ -2962,60 +2720,6 @@ defm : scalar_math_f64_patterns<fdiv, "DIV">; /// /// And, we have a special variant form for a full-vector intrinsic form. -let Sched = WriteFSqrt in { -def SSE_SQRTPS : OpndItins< - NoItinerary, NoItinerary ->; - -def SSE_SQRTSS : OpndItins< - NoItinerary, NoItinerary ->; - -def SSE_SQRTPD : OpndItins< - NoItinerary, NoItinerary ->; - -def SSE_SQRTSD : OpndItins< - NoItinerary, NoItinerary ->; -} - -let Sched = WriteFRsqrt in { -def SSE_RSQRTPS : OpndItins< - NoItinerary, NoItinerary ->; - -def SSE_RSQRTSS : OpndItins< - NoItinerary, NoItinerary ->; -} - -def SSE_RSQRT_P : SizeItins< - SSE_RSQRTPS, SSE_RSQRTPS ->; - -def SSE_RSQRT_S : SizeItins< - SSE_RSQRTSS, SSE_RSQRTSS ->; - -let Sched = WriteFRcp in { -def SSE_RCPP : OpndItins< - NoItinerary, NoItinerary ->; - -def SSE_RCPS : OpndItins< - NoItinerary, NoItinerary ->; -} - -def SSE_RCP_P : SizeItins< - SSE_RCPP, SSE_RCPP ->; - -def SSE_RCP_S : SizeItins< - SSE_RCPS, SSE_RCPS ->; - /// sse_fp_unop_s - SSE1 unops in scalar form /// For the non-AVX defs, we need $src1 to be tied to $dst because /// the HW instructions are 2 operand / destructive. @@ -3573,11 +3277,6 @@ let Predicates = [HasAVX, NoVLX] in { // SSE2 - Packed Integer Arithmetic Instructions //===---------------------------------------------------------------------===// -let Sched = WriteVecIMul in -def SSE_PMADD : OpndItins< - NoItinerary, NoItinerary ->; - let ExeDomain = SSEPackedInt in { // SSE integer instructions /// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types @@ -3802,11 +3501,6 @@ defm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32, // SSE2 - Packed Integer Shuffle Instructions //===---------------------------------------------------------------------===// -let Sched = WriteShuffle in -def SSE_PSHUF : OpndItins< - NoItinerary, NoItinerary ->; - let ExeDomain = SSEPackedInt in { multiclass sse2_pshuffle<string OpcodeStr, ValueType vt128, ValueType vt256, SDNode OpNode, X86FoldableSchedWrite sched, @@ -4614,12 +4308,6 @@ let Predicates = [UseSSE3] in { // SSE3 - Replicate Double FP - MOVDDUP //===---------------------------------------------------------------------===// -// FIXME: Improve MOVDDUP/BROADCAST reg/mem scheduling itineraries. -let Sched = WriteFShuffle in -def SSE_MOVDDUP : OpndItins< - NoItinerary, NoItinerary ->; - multiclass sse3_replicate_dfp<string OpcodeStr> { def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), @@ -4819,11 +4507,6 @@ let Constraints = "$src1 = $dst" in { // SSSE3 - Packed Absolute Instructions //===---------------------------------------------------------------------===// -let Sched = WriteVecALU in -def SSE_PABS : OpndItins< - NoItinerary, NoItinerary ->; - /// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. multiclass SS3I_unop_rm<bits<8> opc, string OpcodeStr, ValueType vt, SDNode OpNode, X86FoldableSchedWrite sched, PatFrag ld_frag> { @@ -4881,30 +4564,6 @@ defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, abs, WriteVecALU, memopv2i64>; // SSSE3 - Packed Binary Operator Instructions //===---------------------------------------------------------------------===// -let Sched = WritePHAdd in { -def SSE_PHADDSUBD : OpndItins< - NoItinerary, NoItinerary ->; -def SSE_PHADDSUBSW : OpndItins< - NoItinerary, NoItinerary ->; -def SSE_PHADDSUBW : OpndItins< - NoItinerary, NoItinerary ->; -} -let Sched = WriteVarShuffle in -def SSE_PSHUFB : OpndItins< - NoItinerary, NoItinerary ->; -let Sched = WriteVecALU in -def SSE_PSIGN : OpndItins< - NoItinerary, NoItinerary ->; -let Sched = WriteVecIMul in -def SSE_PMULHRSW : OpndItins< - NoItinerary, NoItinerary ->; - /// SS3I_binop_rm - Simple SSSE3 bin op multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, ValueType DstVT, ValueType OpVT, RegisterClass RC, @@ -5095,11 +4754,6 @@ defm PMULHRSW : SS3I_binop_rm<0x0B, "pmulhrsw", X86mulhrs, v8i16, v8i16, // SSSE3 - Packed Align Instruction Patterns //===---------------------------------------------------------------------===// -let Sched = WriteShuffle in -def SSE_PALIGN : OpndItins< - NoItinerary, NoItinerary ->; - multiclass ssse3_palignr<string asm, ValueType VT, RegisterClass RC, PatFrag memop_frag, X86MemOperand x86memop, X86FoldableSchedWrite sched, bit Is2Addr = 1> { @@ -5688,14 +5342,6 @@ let Predicates = [UseAVX] in { // SSE4.1 - Round Instructions //===----------------------------------------------------------------------===// -def SSE_ROUNDPS : OpndItins< - NoItinerary, NoItinerary ->; - -def SSE_ROUNDPD : OpndItins< - NoItinerary, NoItinerary ->; - multiclass sse41_fp_unop_p<bits<8> opc, string OpcodeStr, X86MemOperand x86memop, RegisterClass RC, ValueType VT, PatFrag mem_frag, SDNode OpNode, @@ -5992,11 +5638,6 @@ let Predicates = [UseSSE41] in { // SSE4.1 - Packed Bit Test //===----------------------------------------------------------------------===// -let Sched = WriteVecLogic in -def SSE_PTEST : OpndItins< - NoItinerary, NoItinerary ->; - // ptest instruction we'll lower to this in X86ISelLowering primarily from // the intel intrinsic that corresponds to this. let Defs = [EFLAGS], Predicates = [HasAVX] in { @@ -6948,7 +6589,7 @@ let Constraints = "$src1 = $dst" in { // SHA-NI Instructions //===----------------------------------------------------------------------===// -// FIXME: Is there a better scheduler itinerary for SHA than WriteVecIMul? +// FIXME: Is there a better scheduler class for SHA than WriteVecIMul? multiclass SHAI_binop<bits<8> Opc, string OpcodeStr, Intrinsic IntId, X86FoldableSchedWrite sched, bit UsesXMM0 = 0> { def rr : I<Opc, MRMSrcReg, (outs VR128:$dst), @@ -7516,16 +7157,6 @@ defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd", // VPERMIL - Permute Single and Double Floating-Point Values // -let Sched = WriteFVarShuffle in -def AVX_VPERMILV : OpndItins< - NoItinerary, NoItinerary ->; - -let Sched = WriteFShuffle in -def AVX_VPERMIL : OpndItins< - NoItinerary, NoItinerary ->; - multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr, RegisterClass RC, X86MemOperand x86memop_f, X86MemOperand x86memop_i, PatFrag i_frag, @@ -7980,16 +7611,6 @@ let Predicates = [HasAVX1Only] in { // VPERM - Permute instructions // -let Sched = WriteFShuffle256 in -def AVX2_PERMV_F : OpndItins< - NoItinerary, NoItinerary ->; - -let Sched = WriteShuffle256 in -def AVX2_PERMV_I : OpndItins< - NoItinerary, NoItinerary ->; - multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, ValueType OpVT, X86FoldableSchedWrite Sched, X86MemOperand memOp> { @@ -8413,12 +8034,12 @@ multiclass GF2P8MULB_rm<string OpcodeStr, ValueType OpVT, let isCommutable = 1 in def rr : PDI<0xCF, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), "", [(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1, RC:$src2)))]>, - Sched<[SSE_INTALU_ITINS_P.Sched]>, T8PD; + Sched<[WriteVecALU]>, T8PD; def rm : PDI<0xCF, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, X86MemOp:$src2), "", [(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1, (bitconvert (MemOpFrag addr:$src2)))))]>, - Sched<[SSE_INTALU_ITINS_P.Sched.Folded, ReadAfterLd]>, T8PD; + Sched<[WriteVecALU.Folded, ReadAfterLd]>, T8PD; } } |

