diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-05-02 12:27:54 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-05-02 12:27:54 +0000 |
| commit | e8671ef434e2eddc0931a0c1135df55b13204cdc (patch) | |
| tree | 0ba307e7f78edc9ba460b068092e5ba3f2725980 /llvm/lib | |
| parent | 7a36495bf74a411d52c6dcf5b4db459fec21f89e (diff) | |
| download | bcm5719-llvm-e8671ef434e2eddc0931a0c1135df55b13204cdc.tar.gz bcm5719-llvm-e8671ef434e2eddc0931a0c1135df55b13204cdc.zip | |
[X86] Convert most remaining uses of X86SchedWritePair scheduler classes to X86SchedWriteWidths.
We've dealt with the majority already.
llvm-svn: 331347
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 407 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86Schedule.td | 9 |
2 files changed, 222 insertions, 194 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 550076dda73..6583a66dd00 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -3365,27 +3365,28 @@ defm PMULUDQ : PDI_binop_all<0xF4, "pmuludq", X86pmuludq, v2i64, v4i64, let Predicates = [HasAVX, NoVLX_Or_NoBWI] in defm VPMADDWD : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v4i32, v8i16, VR128, - loadv2i64, i128mem, WriteVecIMul, 0>, VEX_4V, VEX_WIG; + loadv2i64, i128mem, SchedWriteVecIMul.XMM, 0>, + VEX_4V, VEX_WIG; let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in defm VPMADDWDY : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v8i32, v16i16, - VR256, loadv4i64, i256mem, WriteVecIMul, + VR256, loadv4i64, i256mem, SchedWriteVecIMul.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; let Constraints = "$src1 = $dst" in defm PMADDWD : PDI_binop_rm2<0xF5, "pmaddwd", X86vpmaddwd, v4i32, v8i16, VR128, - memopv2i64, i128mem, WriteVecIMul>; + memopv2i64, i128mem, SchedWriteVecIMul.XMM>; let Predicates = [HasAVX, NoVLX_Or_NoBWI] in defm VPSADBW : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v2i64, v16i8, VR128, - loadv2i64, i128mem, WritePSADBW, 0>, + loadv2i64, i128mem, SchedWritePSADBW.XMM, 0>, VEX_4V, VEX_WIG; let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in defm VPSADBWY : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v4i64, v32i8, VR256, - loadv4i64, i256mem, WritePSADBW, 0>, + loadv4i64, i256mem, SchedWritePSADBW.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; let Constraints = "$src1 = $dst" in defm PSADBW : PDI_binop_rm2<0xF6, "psadbw", X86psadbw, v2i64, v16i8, VR128, - memopv2i64, i128mem, WritePSADBW>; + memopv2i64, i128mem, SchedWritePSADBW.XMM>; //===---------------------------------------------------------------------===// // SSE2 - Packed Integer Logical Instructions @@ -3426,18 +3427,18 @@ multiclass PDI_binop_rmi_all<bits<8> opc, bits<8> opc2, Format ImmForm, string OpcodeStr, SDNode OpNode, SDNode OpNode2, ValueType DstVT128, ValueType DstVT256, ValueType SrcVT, - X86FoldableSchedWrite sched, Predicate prd> { + X86SchedWriteWidths sched, Predicate prd> { let Predicates = [HasAVX, prd] in defm V#NAME : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr), - OpNode, OpNode2, VR128, sched, DstVT128, + OpNode, OpNode2, VR128, sched.XMM, DstVT128, SrcVT, loadv2i64, 0>, VEX_4V, VEX_WIG; let Predicates = [HasAVX2, prd] in defm V#NAME#Y : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr), - OpNode, OpNode2, VR256, sched, DstVT256, + OpNode, OpNode2, VR256, sched.YMM, DstVT256, SrcVT, loadv2i64, 0>, VEX_4V, VEX_L, VEX_WIG; let Constraints = "$src1 = $dst" in defm NAME : PDI_binop_rmi<opc, opc2, ImmForm, OpcodeStr, OpNode, OpNode2, - VR128, sched, DstVT128, SrcVT, memopv2i64>; + VR128, sched.XMM, DstVT128, SrcVT, memopv2i64>; } multiclass PDI_binop_ri<bits<8> opc, Format ImmForm, string OpcodeStr, @@ -3467,26 +3468,26 @@ let Constraints = "$src1 = $dst" in let ExeDomain = SSEPackedInt in { defm PSLLW : PDI_binop_rmi_all<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli, - v8i16, v16i16, v8i16, WriteVecShift, + v8i16, v16i16, v8i16, SchedWriteVecShift, NoVLX_Or_NoBWI>; defm PSLLD : PDI_binop_rmi_all<0xF2, 0x72, MRM6r, "pslld", X86vshl, X86vshli, - v4i32, v8i32, v4i32, WriteVecShift, NoVLX>; + v4i32, v8i32, v4i32, SchedWriteVecShift, NoVLX>; defm PSLLQ : PDI_binop_rmi_all<0xF3, 0x73, MRM6r, "psllq", X86vshl, X86vshli, - v2i64, v4i64, v2i64, WriteVecShift, NoVLX>; + v2i64, v4i64, v2i64, SchedWriteVecShift, NoVLX>; defm PSRLW : PDI_binop_rmi_all<0xD1, 0x71, MRM2r, "psrlw", X86vsrl, X86vsrli, - v8i16, v16i16, v8i16, WriteVecShift, + v8i16, v16i16, v8i16, SchedWriteVecShift, NoVLX_Or_NoBWI>; defm PSRLD : PDI_binop_rmi_all<0xD2, 0x72, MRM2r, "psrld", X86vsrl, X86vsrli, - v4i32, v8i32, v4i32, WriteVecShift, NoVLX>; + v4i32, v8i32, v4i32, SchedWriteVecShift, NoVLX>; defm PSRLQ : PDI_binop_rmi_all<0xD3, 0x73, MRM2r, "psrlq", X86vsrl, X86vsrli, - v2i64, v4i64, v2i64, WriteVecShift, NoVLX>; + v2i64, v4i64, v2i64, SchedWriteVecShift, NoVLX>; defm PSRAW : PDI_binop_rmi_all<0xE1, 0x71, MRM4r, "psraw", X86vsra, X86vsrai, - v8i16, v16i16, v8i16, WriteVecShift, + v8i16, v16i16, v8i16, SchedWriteVecShift, NoVLX_Or_NoBWI>; defm PSRAD : PDI_binop_rmi_all<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai, - v4i32, v8i32, v4i32, WriteVecShift, NoVLX>; + v4i32, v8i32, v4i32, SchedWriteVecShift, NoVLX>; defm PSLLDQ : PDI_binop_ri_all<0x73, MRM7r, "pslldq", X86vshldq, SchedWriteShuffle>; @@ -3517,7 +3518,7 @@ defm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32, let ExeDomain = SSEPackedInt in { multiclass sse2_pshuffle<string OpcodeStr, ValueType vt128, ValueType vt256, - SDNode OpNode, X86FoldableSchedWrite sched, + SDNode OpNode, X86SchedWriteWidths sched, Predicate prd> { let Predicates = [HasAVX, prd] in { def V#NAME#ri : Ii8<0x70, MRMSrcReg, (outs VR128:$dst), @@ -3526,7 +3527,7 @@ let Predicates = [HasAVX, prd] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))]>, - VEX, Sched<[sched]>, VEX_WIG; + VEX, Sched<[sched.XMM]>, VEX_WIG; def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, u8imm:$src2), !strconcat("v", OpcodeStr, @@ -3534,7 +3535,7 @@ let Predicates = [HasAVX, prd] in { [(set VR128:$dst, (vt128 (OpNode (bitconvert (loadv2i64 addr:$src1)), (i8 imm:$src2))))]>, VEX, - Sched<[sched.Folded]>, VEX_WIG; + Sched<[sched.XMM.Folded]>, VEX_WIG; } let Predicates = [HasAVX2, prd] in { @@ -3544,7 +3545,7 @@ let Predicates = [HasAVX2, prd] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode VR256:$src1, (i8 imm:$src2))))]>, - VEX, VEX_L, Sched<[sched]>, VEX_WIG; + VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG; def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src1, u8imm:$src2), !strconcat("v", OpcodeStr, @@ -3552,7 +3553,7 @@ let Predicates = [HasAVX2, prd] in { [(set VR256:$dst, (vt256 (OpNode (bitconvert (loadv4i64 addr:$src1)), (i8 imm:$src2))))]>, VEX, VEX_L, - Sched<[sched.Folded]>, VEX_WIG; + Sched<[sched.YMM.Folded]>, VEX_WIG; } let Predicates = [UseSSE2] in { @@ -3562,7 +3563,7 @@ let Predicates = [UseSSE2] in { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))]>, - Sched<[sched]>; + Sched<[sched.XMM]>; def mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1, u8imm:$src2), !strconcat(OpcodeStr, @@ -3570,17 +3571,17 @@ let Predicates = [UseSSE2] in { [(set VR128:$dst, (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)), (i8 imm:$src2))))]>, - Sched<[sched.Folded]>; + Sched<[sched.XMM.Folded]>; } } } // ExeDomain = SSEPackedInt -defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, v8i32, X86PShufd, WriteShuffle, - NoVLX>, PD; -defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, v16i16, X86PShufhw, WriteShuffle, - NoVLX_Or_NoBWI>, XS; -defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, v16i16, X86PShuflw, WriteShuffle, - NoVLX_Or_NoBWI>, XD; +defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, v8i32, X86PShufd, + SchedWriteShuffle, NoVLX>, PD; +defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, v16i16, X86PShufhw, + SchedWriteShuffle, NoVLX_Or_NoBWI>, XS; +defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, v16i16, X86PShuflw, + SchedWriteShuffle, NoVLX_Or_NoBWI>, XD; //===---------------------------------------------------------------------===// // Packed Integer Pack Instructions (SSE & AVX) @@ -3639,43 +3640,47 @@ multiclass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT, let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { defm VPACKSSWB : sse2_pack<0x63, "vpacksswb", v16i8, v8i16, X86Packss, VR128, - i128mem, WriteShuffle, loadv2i64, 0>, VEX_4V, VEX_WIG; + i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>, + VEX_4V, VEX_WIG; defm VPACKSSDW : sse2_pack<0x6B, "vpackssdw", v8i16, v4i32, X86Packss, VR128, - i128mem, WriteShuffle, loadv2i64, 0>, VEX_4V, VEX_WIG; + i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>, + VEX_4V, VEX_WIG; defm VPACKUSWB : sse2_pack<0x67, "vpackuswb", v16i8, v8i16, X86Packus, VR128, - i128mem, WriteShuffle, loadv2i64, 0>, VEX_4V, VEX_WIG; + i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>, + VEX_4V, VEX_WIG; defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus, VR128, - i128mem, WriteShuffle, loadv2i64, 0>, VEX_4V; + i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>, + VEX_4V; } let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { - defm VPACKSSWBY : sse2_pack<0x63, "vpacksswb", v32i8, v16i16, X86Packss, - VR256, i256mem, WriteShuffle, loadv4i64, 0>, + defm VPACKSSWBY : sse2_pack<0x63, "vpacksswb", v32i8, v16i16, X86Packss, VR256, + i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>, VEX_4V, VEX_L, VEX_WIG; - defm VPACKSSDWY : sse2_pack<0x6B, "vpackssdw", v16i16, v8i32, X86Packss, - VR256, i256mem, WriteShuffle, loadv4i64, 0>, + defm VPACKSSDWY : sse2_pack<0x6B, "vpackssdw", v16i16, v8i32, X86Packss, VR256, + i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>, VEX_4V, VEX_L, VEX_WIG; - defm VPACKUSWBY : sse2_pack<0x67, "vpackuswb", v32i8, v16i16, X86Packus, - VR256,i256mem, WriteShuffle, loadv4i64, 0>, + defm VPACKUSWBY : sse2_pack<0x67, "vpackuswb", v32i8, v16i16, X86Packus, VR256, + i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>, VEX_4V, VEX_L, VEX_WIG; - defm VPACKUSDWY : sse4_pack<0x2B, "vpackusdw", v16i16, v8i32, X86Packus, - VR256, i256mem, WriteShuffle, loadv4i64, 0>, + defm VPACKUSDWY : sse4_pack<0x2B, "vpackusdw", v16i16, v8i32, X86Packus, VR256, + i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>, VEX_4V, VEX_L; } let Constraints = "$src1 = $dst" in { defm PACKSSWB : sse2_pack<0x63, "packsswb", v16i8, v8i16, X86Packss, VR128, - i128mem, WriteShuffle, memopv2i64>; + i128mem, SchedWriteShuffle.XMM, memopv2i64>; defm PACKSSDW : sse2_pack<0x6B, "packssdw", v8i16, v4i32, X86Packss, VR128, - i128mem, WriteShuffle, memopv2i64>; + i128mem, SchedWriteShuffle.XMM, memopv2i64>; defm PACKUSWB : sse2_pack<0x67, "packuswb", v16i8, v8i16, X86Packus, VR128, - i128mem, WriteShuffle, memopv2i64>; + i128mem, SchedWriteShuffle.XMM, memopv2i64>; defm PACKUSDW : sse4_pack<0x2B, "packusdw", v8i16, v4i32, X86Packus, VR128, - i128mem, WriteShuffle, memopv2i64>; + i128mem, SchedWriteShuffle.XMM, memopv2i64>; } } // ExeDomain = SSEPackedInt @@ -3707,78 +3712,82 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt, let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl, VR128, - i128mem, WriteShuffle, loadv2i64, 0>, VEX_4V, VEX_WIG; + i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>, + VEX_4V, VEX_WIG; defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl, VR128, - i128mem, WriteShuffle, loadv2i64, 0>, VEX_4V, VEX_WIG; + i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>, + VEX_4V, VEX_WIG; defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh, VR128, - i128mem, WriteShuffle, loadv2i64, 0>, VEX_4V, VEX_WIG; + i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>, + VEX_4V, VEX_WIG; defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh, VR128, - i128mem, WriteShuffle, loadv2i64, 0>, VEX_4V, VEX_WIG; + i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>, + VEX_4V, VEX_WIG; } let Predicates = [HasAVX, NoVLX] in { defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl, VR128, - i128mem, WriteShuffle, loadv2i64, 0>, + i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>, VEX_4V, VEX_WIG; defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl, VR128, - i128mem, WriteShuffle, loadv2i64, 0>, + i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>, VEX_4V, VEX_WIG; defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh, VR128, - i128mem, WriteShuffle, loadv2i64, 0>, + i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>, VEX_4V, VEX_WIG; defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh, VR128, - i128mem, WriteShuffle, loadv2i64, 0>, + i128mem, SchedWriteShuffle.XMM, loadv2i64, 0>, VEX_4V, VEX_WIG; } let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { defm VPUNPCKLBWY : sse2_unpack<0x60, "vpunpcklbw", v32i8, X86Unpckl, VR256, - i256mem, WriteShuffle, loadv4i64, 0>, + i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>, VEX_4V, VEX_L, VEX_WIG; defm VPUNPCKLWDY : sse2_unpack<0x61, "vpunpcklwd", v16i16, X86Unpckl, VR256, - i256mem, WriteShuffle, loadv4i64, 0>, + i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>, VEX_4V, VEX_L, VEX_WIG; defm VPUNPCKHBWY : sse2_unpack<0x68, "vpunpckhbw", v32i8, X86Unpckh, VR256, - i256mem, WriteShuffle, loadv4i64, 0>, + i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>, VEX_4V, VEX_L, VEX_WIG; defm VPUNPCKHWDY : sse2_unpack<0x69, "vpunpckhwd", v16i16, X86Unpckh, VR256, - i256mem, WriteShuffle, loadv4i64, 0>, + i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>, VEX_4V, VEX_L, VEX_WIG; } let Predicates = [HasAVX2, NoVLX] in { defm VPUNPCKLDQY : sse2_unpack<0x62, "vpunpckldq", v8i32, X86Unpckl, VR256, - i256mem, WriteShuffle, loadv4i64, 0>, + i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>, VEX_4V, VEX_L, VEX_WIG; defm VPUNPCKLQDQY : sse2_unpack<0x6C, "vpunpcklqdq", v4i64, X86Unpckl, VR256, - i256mem, WriteShuffle, loadv4i64, 0>, + i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>, VEX_4V, VEX_L, VEX_WIG; defm VPUNPCKHDQY : sse2_unpack<0x6A, "vpunpckhdq", v8i32, X86Unpckh, VR256, - i256mem, WriteShuffle, loadv4i64, 0>, + i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>, VEX_4V, VEX_L, VEX_WIG; defm VPUNPCKHQDQY : sse2_unpack<0x6D, "vpunpckhqdq", v4i64, X86Unpckh, VR256, - i256mem, WriteShuffle, loadv4i64, 0>, + i256mem, SchedWriteShuffle.YMM, loadv4i64, 0>, VEX_4V, VEX_L, VEX_WIG; } let Constraints = "$src1 = $dst" in { defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl, VR128, - i128mem, WriteShuffle, memopv2i64>; + i128mem, SchedWriteShuffle.XMM, memopv2i64>; defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl, VR128, - i128mem, WriteShuffle, memopv2i64>; + i128mem, SchedWriteShuffle.XMM, memopv2i64>; defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl, VR128, - i128mem, WriteShuffle, memopv2i64>; + i128mem, SchedWriteShuffle.XMM, memopv2i64>; defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl, VR128, - i128mem, WriteShuffle, memopv2i64>; + i128mem, SchedWriteShuffle.XMM, memopv2i64>; defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh, VR128, - i128mem, WriteShuffle, memopv2i64>; + i128mem, SchedWriteShuffle.XMM, memopv2i64>; defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh, VR128, - i128mem, WriteShuffle, memopv2i64>; + i128mem, SchedWriteShuffle.XMM, memopv2i64>; defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh, VR128, - i128mem, WriteShuffle, memopv2i64>; + i128mem, SchedWriteShuffle.XMM, memopv2i64>; defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh, VR128, - i128mem, WriteShuffle, memopv2i64>; + i128mem, SchedWriteShuffle.XMM, memopv2i64>; } } // ExeDomain = SSEPackedInt @@ -4529,56 +4538,65 @@ let Constraints = "$src1 = $dst" in { /// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. multiclass SS3I_unop_rm<bits<8> opc, string OpcodeStr, ValueType vt, - SDNode OpNode, X86FoldableSchedWrite sched, PatFrag ld_frag> { + SDNode OpNode, X86SchedWriteWidths sched, PatFrag ld_frag> { def rr : SS38I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (vt (OpNode VR128:$src)))]>, - Sched<[sched]>; + Sched<[sched.XMM]>; def rm : SS38I<opc, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (vt (OpNode (bitconvert (ld_frag addr:$src)))))]>, - Sched<[sched.Folded]>; + Sched<[sched.XMM.Folded]>; } /// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. multiclass SS3I_unop_rm_y<bits<8> opc, string OpcodeStr, ValueType vt, - SDNode OpNode, X86FoldableSchedWrite sched> { + SDNode OpNode, X86SchedWriteWidths sched> { def Yrr : SS38I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (vt (OpNode VR256:$src)))]>, - Sched<[sched]>; + Sched<[sched.YMM]>; def Yrm : SS38I<opc, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (vt (OpNode (bitconvert (loadv4i64 addr:$src)))))]>, - Sched<[sched.Folded]>; + Sched<[sched.YMM.Folded]>; } let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { - defm VPABSB : SS3I_unop_rm<0x1C, "vpabsb", v16i8, abs, WriteVecALU, loadv2i64>, VEX, VEX_WIG; - defm VPABSW : SS3I_unop_rm<0x1D, "vpabsw", v8i16, abs, WriteVecALU, loadv2i64>, VEX, VEX_WIG; + defm VPABSB : SS3I_unop_rm<0x1C, "vpabsb", v16i8, abs, SchedWriteVecALU, + loadv2i64>, VEX, VEX_WIG; + defm VPABSW : SS3I_unop_rm<0x1D, "vpabsw", v8i16, abs, SchedWriteVecALU, + loadv2i64>, VEX, VEX_WIG; } let Predicates = [HasAVX, NoVLX] in { - defm VPABSD : SS3I_unop_rm<0x1E, "vpabsd", v4i32, abs, WriteVecALU, loadv2i64>, VEX, VEX_WIG; + defm VPABSD : SS3I_unop_rm<0x1E, "vpabsd", v4i32, abs, SchedWriteVecALU, + loadv2i64>, VEX, VEX_WIG; } let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { - defm VPABSB : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, abs, WriteVecALU>, VEX, VEX_L, VEX_WIG; - defm VPABSW : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, abs, WriteVecALU>, VEX, VEX_L, VEX_WIG; + defm VPABSB : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, abs, SchedWriteVecALU>, + VEX, VEX_L, VEX_WIG; + defm VPABSW : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, abs, SchedWriteVecALU>, + VEX, VEX_L, VEX_WIG; } let Predicates = [HasAVX2, NoVLX] in { - defm VPABSD : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, abs, WriteVecALU>, VEX, VEX_L, VEX_WIG; + defm VPABSD : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, abs, SchedWriteVecALU>, + VEX, VEX_L, VEX_WIG; } -defm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, abs, WriteVecALU, memopv2i64>; -defm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, abs, WriteVecALU, memopv2i64>; -defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, abs, WriteVecALU, memopv2i64>; +defm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, abs, SchedWriteVecALU, + memopv2i64>; +defm PABSW : SS3I_unop_rm<0x1D, "pabsw", v8i16, abs, SchedWriteVecALU, + memopv2i64>; +defm PABSD : SS3I_unop_rm<0x1E, "pabsd", v4i32, abs, SchedWriteVecALU, + memopv2i64>; //===---------------------------------------------------------------------===// // SSSE3 - Packed Binary Operator Instructions @@ -4652,14 +4670,14 @@ let ImmT = NoImm, Predicates = [HasAVX, NoVLX_Or_NoBWI] in { let isCommutable = 0 in { defm VPSHUFB : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, v16i8, VR128, loadv2i64, i128mem, - WriteVarShuffle, 0>, VEX_4V, VEX_WIG; + SchedWriteVarShuffle.XMM, 0>, VEX_4V, VEX_WIG; defm VPMADDUBSW : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v8i16, v16i8, VR128, loadv2i64, i128mem, - WriteVecIMul, 0>, VEX_4V, VEX_WIG; + SchedWriteVecIMul.XMM, 0>, VEX_4V, VEX_WIG; } defm VPMULHRSW : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v8i16, v8i16, VR128, loadv2i64, i128mem, - WriteVecIMul, 0>, VEX_4V, VEX_WIG; + SchedWriteVecIMul.XMM, 0>, VEX_4V, VEX_WIG; } let ImmT = NoImm, Predicates = [HasAVX] in { @@ -4678,13 +4696,13 @@ let isCommutable = 0 in { WritePHAdd, 0>, VEX_4V; defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", int_x86_ssse3_psign_b_128, - WriteVecALU, loadv2i64, 0>, VEX_4V, VEX_WIG; + SchedWriteVecALU.XMM, loadv2i64, 0>, VEX_4V, VEX_WIG; defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", int_x86_ssse3_psign_w_128, - WriteVecALU, loadv2i64, 0>, VEX_4V, VEX_WIG; + SchedWriteVecALU.XMM, loadv2i64, 0>, VEX_4V, VEX_WIG; defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", int_x86_ssse3_psign_d_128, - WriteVecALU, loadv2i64, 0>, VEX_4V, VEX_WIG; + SchedWriteVecALU.XMM, loadv2i64, 0>, VEX_4V, VEX_WIG; defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", int_x86_ssse3_phadd_sw_128, WritePHAdd, loadv2i64, 0>, VEX_4V, VEX_WIG; @@ -4698,14 +4716,14 @@ let ImmT = NoImm, Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { let isCommutable = 0 in { defm VPSHUFBY : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, v32i8, VR256, loadv4i64, i256mem, - WriteVarShuffle, 0>, VEX_4V, VEX_L, VEX_WIG; + SchedWriteVarShuffle.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; defm VPMADDUBSWY : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v16i16, v32i8, VR256, loadv4i64, i256mem, - WriteVecIMul, 0>, VEX_4V, VEX_L, VEX_WIG; + SchedWriteVecIMul.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; } defm VPMULHRSWY : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v16i16, v16i16, VR256, loadv4i64, i256mem, - WriteVecIMul, 0>, VEX_4V, VEX_L, VEX_WIG; + SchedWriteVecIMul.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; } let ImmT = NoImm, Predicates = [HasAVX2] in { @@ -4723,11 +4741,11 @@ let isCommutable = 0 in { loadv4i64, i256mem, WritePHAdd, 0>, VEX_4V, VEX_L; defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", int_x86_avx2_psign_b, - WriteVecALU>, VEX_4V, VEX_L, VEX_WIG; + SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG; defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", int_x86_avx2_psign_w, - WriteVecALU>, VEX_4V, VEX_L, VEX_WIG; + SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG; defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd", int_x86_avx2_psign_d, - WriteVecALU>, VEX_4V, VEX_L, VEX_WIG; + SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG; defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw", int_x86_avx2_phadd_sw, WritePHAdd>, VEX_4V, VEX_L, VEX_WIG; @@ -4749,13 +4767,13 @@ let isCommutable = 0 in { defm PHSUBD : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, v4i32, VR128, memopv2i64, i128mem, WritePHAdd>; defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", int_x86_ssse3_psign_b_128, - WriteVecALU, memopv2i64>; + SchedWriteVecALU.XMM, memopv2i64>; defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", int_x86_ssse3_psign_w_128, - WriteVecALU, memopv2i64>; + SchedWriteVecALU.XMM, memopv2i64>; defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", int_x86_ssse3_psign_d_128, - WriteVecALU, memopv2i64>; + SchedWriteVecALU.XMM, memopv2i64>; defm PSHUFB : SS3I_binop_rm<0x00, "pshufb", X86pshufb, v16i8, v16i8, VR128, - memopv2i64, i128mem, WriteVarShuffle>; + memopv2i64, i128mem, SchedWriteVarShuffle.XMM>; defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", int_x86_ssse3_phadd_sw_128, WritePHAdd, memopv2i64>; @@ -4764,10 +4782,10 @@ let isCommutable = 0 in { WritePHAdd, memopv2i64>; defm PMADDUBSW : SS3I_binop_rm<0x04, "pmaddubsw", X86vpmaddubsw, v8i16, v16i8, VR128, memopv2i64, i128mem, - WriteVecIMul>; + SchedWriteVecIMul.XMM>; } defm PMULHRSW : SS3I_binop_rm<0x0B, "pmulhrsw", X86mulhrs, v8i16, v8i16, - VR128, memopv2i64, i128mem, WriteVecIMul>; + VR128, memopv2i64, i128mem, SchedWriteVecIMul.XMM>; } //===---------------------------------------------------------------------===// @@ -4801,14 +4819,14 @@ multiclass ssse3_palignr<string asm, ValueType VT, RegisterClass RC, } let Predicates = [HasAVX, NoVLX_Or_NoBWI] in - defm VPALIGNR : ssse3_palignr<"vpalignr", v16i8, VR128, loadv2i64, - i128mem, WriteShuffle, 0>, VEX_4V, VEX_WIG; + defm VPALIGNR : ssse3_palignr<"vpalignr", v16i8, VR128, loadv2i64, i128mem, + SchedWriteShuffle.XMM, 0>, VEX_4V, VEX_WIG; let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in - defm VPALIGNRY : ssse3_palignr<"vpalignr", v32i8, VR256, loadv4i64, - i256mem, WriteShuffle, 0>, VEX_4V, VEX_L, VEX_WIG; + defm VPALIGNRY : ssse3_palignr<"vpalignr", v32i8, VR256, loadv4i64, i256mem, + SchedWriteShuffle.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in - defm PALIGNR : ssse3_palignr<"palignr", v16i8, VR128, memopv2i64, - i128mem, WriteShuffle>; + defm PALIGNR : ssse3_palignr<"palignr", v16i8, VR128, memopv2i64, i128mem, + SchedWriteShuffle.XMM>; //===---------------------------------------------------------------------===// // SSSE3 - Thread synchronization @@ -4853,26 +4871,26 @@ multiclass SS41I_pmovx_rrrm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp, !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, Sched<[sched.Folded]>; } - +// FIXME: YMM cases should use SchedWriteShuffle.YMM. multiclass SS41I_pmovx_rm_all<bits<8> opc, string OpcodeStr, X86MemOperand MemOp, X86MemOperand MemYOp, - X86FoldableSchedWrite sched, Predicate prd> { - defm NAME : SS41I_pmovx_rrrm<opc, OpcodeStr, MemOp, VR128, VR128, sched>; + X86SchedWriteWidths sched, Predicate prd> { + defm NAME : SS41I_pmovx_rrrm<opc, OpcodeStr, MemOp, VR128, VR128, sched.XMM>; let Predicates = [HasAVX, prd] in defm V#NAME : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemOp, - VR128, VR128, sched>, VEX, VEX_WIG; + VR128, VR128, sched.XMM>, VEX, VEX_WIG; let Predicates = [HasAVX2, prd] in defm V#NAME#Y : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemYOp, - VR256, VR128, sched>, VEX, VEX_L, VEX_WIG; + VR256, VR128, sched.XMM>, VEX, VEX_L, VEX_WIG; } multiclass SS41I_pmovx_rm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp, X86MemOperand MemYOp, Predicate prd> { defm PMOVSX#NAME : SS41I_pmovx_rm_all<opc, !strconcat("pmovsx", OpcodeStr), - MemOp, MemYOp, WriteShuffle, prd>; + MemOp, MemYOp, SchedWriteShuffle, prd>; defm PMOVZX#NAME : SS41I_pmovx_rm_all<!add(opc, 0x10), !strconcat("pmovzx", OpcodeStr), - MemOp, MemYOp, WriteShuffle, prd>; + MemOp, MemYOp, SchedWriteShuffle, prd>; } defm BW : SS41I_pmovx_rm<0x20, "bw", i64mem, i128mem, NoVLX_Or_NoBWI>; @@ -5734,7 +5752,7 @@ let Defs = [EFLAGS], Predicates = [HasPOPCNT] in { "popcnt{w}\t{$src, $dst|$dst, $src}", [(set GR16:$dst, (ctpop (loadi16 addr:$src))), (implicit EFLAGS)]>, - Sched<[WritePOPCNTLd]>, OpSize16, XS; + Sched<[WritePOPCNT.Folded]>, OpSize16, XS; def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), "popcnt{l}\t{$src, $dst|$dst, $src}", @@ -5745,7 +5763,7 @@ let Defs = [EFLAGS], Predicates = [HasPOPCNT] in { "popcnt{l}\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (ctpop (loadi32 addr:$src))), (implicit EFLAGS)]>, - Sched<[WritePOPCNTLd]>, OpSize32, XS; + Sched<[WritePOPCNT.Folded]>, OpSize32, XS; def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), "popcnt{q}\t{$src, $dst|$dst, $src}", @@ -5755,7 +5773,7 @@ let Defs = [EFLAGS], Predicates = [HasPOPCNT] in { "popcnt{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (ctpop (loadi64 addr:$src))), (implicit EFLAGS)]>, - Sched<[WritePOPCNTLd]>, XS; + Sched<[WritePOPCNT.Folded]>, XS; } // SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16. @@ -5810,112 +5828,112 @@ multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, let Predicates = [HasAVX, NoVLX] in { defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", smin, v4i32, VR128, - loadv2i64, i128mem, WriteVecALU, 0>, + loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>, VEX_4V, VEX_WIG; defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", umin, v4i32, VR128, - loadv2i64, i128mem, WriteVecALU, 0>, + loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>, VEX_4V, VEX_WIG; defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v4i32, VR128, - loadv2i64, i128mem, WriteVecALU, 0>, + loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>, VEX_4V, VEX_WIG; defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", umax, v4i32, VR128, - loadv2i64, i128mem, WriteVecALU, 0>, + loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>, VEX_4V, VEX_WIG; defm VPMULDQ : SS48I_binop_rm<0x28, "vpmuldq", X86pmuldq, v2i64, VR128, - loadv2i64, i128mem, WriteVecIMul, 0>, + loadv2i64, i128mem, SchedWriteVecIMul.XMM, 0>, VEX_4V, VEX_WIG; } let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", smin, v16i8, VR128, - loadv2i64, i128mem, WriteVecALU, 0>, + loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>, VEX_4V, VEX_WIG; defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", umin, v8i16, VR128, - loadv2i64, i128mem, WriteVecALU, 0>, + loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>, VEX_4V, VEX_WIG; defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v16i8, VR128, - loadv2i64, i128mem, WriteVecALU, 0>, + loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>, VEX_4V, VEX_WIG; defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v8i16, VR128, - loadv2i64, i128mem, WriteVecALU, 0>, + loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>, VEX_4V, VEX_WIG; } let Predicates = [HasAVX2, NoVLX] in { defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", smin, v8i32, VR256, - loadv4i64, i256mem, WriteVecALU, 0>, + loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", umin, v8i32, VR256, - loadv4i64, i256mem, WriteVecALU, 0>, + loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v8i32, VR256, - loadv4i64, i256mem, WriteVecALU, 0>, + loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", umax, v8i32, VR256, - loadv4i64, i256mem, WriteVecALU, 0>, + loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; defm VPMULDQY : SS48I_binop_rm<0x28, "vpmuldq", X86pmuldq, v4i64, VR256, - loadv4i64, i256mem, WriteVecIMul, 0>, + loadv4i64, i256mem, SchedWriteVecIMul.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; } let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", smin, v32i8, VR256, - loadv4i64, i256mem, WriteVecALU, 0>, + loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", umin, v16i16, VR256, - loadv4i64, i256mem, WriteVecALU, 0>, + loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v32i8, VR256, - loadv4i64, i256mem, WriteVecALU, 0>, + loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v16i16, VR256, - loadv4i64, i256mem, WriteVecALU, 0>, + loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; } let Constraints = "$src1 = $dst" in { defm PMINSB : SS48I_binop_rm<0x38, "pminsb", smin, v16i8, VR128, - memopv2i64, i128mem, WriteVecALU, 1>; + memopv2i64, i128mem, SchedWriteVecALU.XMM, 1>; defm PMINSD : SS48I_binop_rm<0x39, "pminsd", smin, v4i32, VR128, - memopv2i64, i128mem, WriteVecALU, 1>; + memopv2i64, i128mem, SchedWriteVecALU.XMM, 1>; defm PMINUD : SS48I_binop_rm<0x3B, "pminud", umin, v4i32, VR128, - memopv2i64, i128mem, WriteVecALU, 1>; + memopv2i64, i128mem, SchedWriteVecALU.XMM, 1>; defm PMINUW : SS48I_binop_rm<0x3A, "pminuw", umin, v8i16, VR128, - memopv2i64, i128mem, WriteVecALU, 1>; + memopv2i64, i128mem, SchedWriteVecALU.XMM, 1>; defm PMAXSB : SS48I_binop_rm<0x3C, "pmaxsb", smax, v16i8, VR128, - memopv2i64, i128mem, WriteVecALU, 1>; + memopv2i64, i128mem, SchedWriteVecALU.XMM, 1>; defm PMAXSD : SS48I_binop_rm<0x3D, "pmaxsd", smax, v4i32, VR128, - memopv2i64, i128mem, WriteVecALU, 1>; + memopv2i64, i128mem, SchedWriteVecALU.XMM, 1>; defm PMAXUD : SS48I_binop_rm<0x3F, "pmaxud", umax, v4i32, VR128, - memopv2i64, i128mem, WriteVecALU, 1>; + memopv2i64, i128mem, SchedWriteVecALU.XMM, 1>; defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", umax, v8i16, VR128, - memopv2i64, i128mem, WriteVecALU, 1>; + memopv2i64, i128mem, SchedWriteVecALU.XMM, 1>; defm PMULDQ : SS48I_binop_rm<0x28, "pmuldq", X86pmuldq, v2i64, VR128, - memopv2i64, i128mem, WriteVecIMul, 1>; + memopv2i64, i128mem, SchedWriteVecIMul.XMM, 1>; } let Predicates = [HasAVX, NoVLX] in defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128, - loadv2i64, i128mem, WritePMULLD, 0>, + loadv2i64, i128mem, SchedWritePMULLD.XMM, 0>, VEX_4V, VEX_WIG; let Predicates = [HasAVX] in defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128, - loadv2i64, i128mem, WriteVecALU, 0>, + loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>, VEX_4V, VEX_WIG; let Predicates = [HasAVX2, NoVLX] in defm VPMULLDY : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256, - loadv4i64, i256mem, WritePMULLD, 0>, + loadv4i64, i256mem, SchedWritePMULLD.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; let Predicates = [HasAVX2] in defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256, - loadv4i64, i256mem, WriteVecALU, 0>, + loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; let Constraints = "$src1 = $dst" in { defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128, - memopv2i64, i128mem, WritePMULLD, 1>; + memopv2i64, i128mem, SchedWritePMULLD.XMM, 1>; defm PCMPEQQ : SS48I_binop_rm<0x29, "pcmpeqq", X86pcmpeq, v2i64, VR128, - memopv2i64, i128mem, WriteVecALU, 1>; + memopv2i64, i128mem, SchedWriteVecALU.XMM, 1>; } /// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate @@ -5993,7 +6011,7 @@ let Predicates = [HasAVX] in { let isCommutable = 0 in { defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw, VR128, loadv2i64, i128mem, 0, - WriteMPSAD>, VEX_4V, VEX_WIG; + SchedWriteMPSAD.XMM>, VEX_4V, VEX_WIG; } let ExeDomain = SSEPackedSingle in @@ -6014,14 +6032,15 @@ let Predicates = [HasAVX2] in { let isCommutable = 0 in { defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw, VR256, loadv4i64, i256mem, 0, - WriteMPSAD>, VEX_4V, VEX_L, VEX_WIG; + SchedWriteMPSAD.YMM>, VEX_4V, VEX_L, VEX_WIG; } } let Constraints = "$src1 = $dst" in { let isCommutable = 0 in { defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw, - VR128, memopv2i64, i128mem, 1, WriteMPSAD>; + VR128, memopv2i64, i128mem, 1, + SchedWriteMPSAD.XMM>; } let ExeDomain = SSEPackedSingle in @@ -6395,17 +6414,17 @@ multiclass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, let Predicates = [HasAVX] in defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128, - loadv2i64, i128mem, WriteVecALU, 0>, + loadv2i64, i128mem, SchedWriteVecALU.XMM, 0>, VEX_4V, VEX_WIG; let Predicates = [HasAVX2] in defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256, - loadv4i64, i256mem, WriteVecALU, 0>, + loadv4i64, i256mem, SchedWriteVecALU.YMM, 0>, VEX_4V, VEX_L, VEX_WIG; let Constraints = "$src1 = $dst" in defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128, - memopv2i64, i128mem, WriteVecALU>; + memopv2i64, i128mem, SchedWriteVecALU.XMM>; //===----------------------------------------------------------------------===// // SSE4.2 - String/text Processing Instructions @@ -6420,7 +6439,7 @@ multiclass pcmpistrm_SS42AI<string asm> { def rm :SS42AI<0x62, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2, u8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), - []>, Sched<[WritePCmpIStrMLd, ReadAfterLd]>; + []>, Sched<[WritePCmpIStrM.Folded, ReadAfterLd]>; } let Defs = [XMM0, EFLAGS], hasSideEffects = 0 in { @@ -6438,7 +6457,7 @@ multiclass SS42AI_pcmpestrm<string asm> { def rm : SS42AI<0x60, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src3, u8imm:$src5), !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), - []>, Sched<[WritePCmpEStrMLd, ReadAfterLd]>; + []>, Sched<[WritePCmpEStrM.Folded, ReadAfterLd]>; } let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in { @@ -6456,7 +6475,7 @@ multiclass SS42AI_pcmpistri<string asm> { def rm : SS42AI<0x63, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2, u8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), - []>, Sched<[WritePCmpIStrILd, ReadAfterLd]>; + []>, Sched<[WritePCmpIStrI.Folded, ReadAfterLd]>; } let Defs = [ECX, EFLAGS], hasSideEffects = 0 in { @@ -6474,7 +6493,7 @@ multiclass SS42AI_pcmpestri<string asm> { def rm : SS42AI<0x61, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src3, u8imm:$src5), !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), - []>, Sched<[WritePCmpEStrILd, ReadAfterLd]>; + []>, Sched<[WritePCmpEStrI.Folded, ReadAfterLd]>; } let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in { @@ -6504,7 +6523,7 @@ class SS42I_crc32m<bits<8> opc, string asm, RegisterClass RCOut, SS42FI<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2), !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"), [(set RCOut:$dst, (Int RCOut:$src1, (load addr:$src2)))]>, - Sched<[WriteCRC32Ld, ReadAfterLd]>; + Sched<[WriteCRC32.Folded, ReadAfterLd]>; let Constraints = "$src1 = $dst" in { def CRC32r32m8 : SS42I_crc32m<0xF0, "crc32{b}", GR32, i8mem, @@ -6569,7 +6588,7 @@ let Constraints = "$src1 = $dst", Predicates = [HasSHA] in { [(set VR128:$dst, (int_x86_sha1rnds4 VR128:$src1, VR128:$src2, (i8 imm:$src3)))]>, TA, - Sched<[WriteVecIMul]>; + Sched<[SchedWriteVecIMul.XMM]>; def SHA1RNDS4rmi : Ii8<0xCC, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, u8imm:$src3), "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}", @@ -6577,23 +6596,23 @@ let Constraints = "$src1 = $dst", Predicates = [HasSHA] in { (int_x86_sha1rnds4 VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$src3)))]>, TA, - Sched<[WriteVecIMul.Folded, ReadAfterLd]>; + Sched<[SchedWriteVecIMul.XMM.Folded, ReadAfterLd]>; defm SHA1NEXTE : SHAI_binop<0xC8, "sha1nexte", int_x86_sha1nexte, - WriteVecIMul>; + SchedWriteVecIMul.XMM>; defm SHA1MSG1 : SHAI_binop<0xC9, "sha1msg1", int_x86_sha1msg1, - WriteVecIMul>; + SchedWriteVecIMul.XMM>; defm SHA1MSG2 : SHAI_binop<0xCA, "sha1msg2", int_x86_sha1msg2, - WriteVecIMul>; + SchedWriteVecIMul.XMM>; let Uses=[XMM0] in defm SHA256RNDS2 : SHAI_binop<0xCB, "sha256rnds2", int_x86_sha256rnds2, - WriteVecIMul, 1>; + SchedWriteVecIMul.XMM, 1>; defm SHA256MSG1 : SHAI_binop<0xCC, "sha256msg1", int_x86_sha256msg1, - WriteVecIMul>; + SchedWriteVecIMul.XMM>; defm SHA256MSG2 : SHAI_binop<0xCD, "sha256msg2", int_x86_sha256msg2, - WriteVecIMul>; + SchedWriteVecIMul.XMM>; } // Aliases with explicit %xmm0 @@ -6620,7 +6639,7 @@ multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr, def rm : AES8I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, MemOp:$src2), "", [(set RC:$dst, (IntId RC:$src1, (ld_frag addr:$src2)))]>, - Sched<[WriteAESDecEncLd, ReadAfterLd]>; + Sched<[WriteAESDecEnc.Folded, ReadAfterLd]>; } } @@ -6674,7 +6693,7 @@ let Predicates = [HasAVX, HasAES] in { (ins i128mem:$src1), "vaesimc\t{$src1, $dst|$dst, $src1}", [(set VR128:$dst, (int_x86_aesni_aesimc (loadv2i64 addr:$src1)))]>, - Sched<[WriteAESIMCLd]>, VEX, VEX_WIG; + Sched<[WriteAESIMC.Folded]>, VEX, VEX_WIG; } def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1), @@ -6685,7 +6704,7 @@ def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1), "aesimc\t{$src1, $dst|$dst, $src1}", [(set VR128:$dst, (int_x86_aesni_aesimc (memopv2i64 addr:$src1)))]>, - Sched<[WriteAESIMCLd]>; + Sched<[WriteAESIMC.Folded]>; // AES Round Key Generation Assist let Predicates = [HasAVX, HasAES] in { @@ -6700,7 +6719,7 @@ let Predicates = [HasAVX, HasAES] in { "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_aesni_aeskeygenassist (loadv2i64 addr:$src1), imm:$src2))]>, - Sched<[WriteAESKeyGenLd]>, VEX, VEX_WIG; + Sched<[WriteAESKeyGen.Folded]>, VEX, VEX_WIG; } def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2), @@ -6713,7 +6732,7 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, (int_x86_aesni_aeskeygenassist (memopv2i64 addr:$src1), imm:$src2))]>, - Sched<[WriteAESKeyGenLd]>; + Sched<[WriteAESKeyGen.Folded]>; //===----------------------------------------------------------------------===// // PCLMUL Instructions @@ -6742,7 +6761,7 @@ let Predicates = [NoAVX, HasPCLMUL] in { [(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1, (memopv2i64 addr:$src2), imm:$src3))]>, - Sched<[WriteCLMulLd, ReadAfterLd]>; + Sched<[WriteCLMul.Folded, ReadAfterLd]>; } // Constraints = "$src1 = $dst" def : Pat<(int_x86_pclmulqdq (memopv2i64 addr:$src2), VR128:$src1, @@ -6778,7 +6797,7 @@ multiclass vpclmulqdq<RegisterClass RC, X86MemOperand MemOp, "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set RC:$dst, (IntId RC:$src1, (LdFrag addr:$src2), imm:$src3))]>, - Sched<[WriteCLMulLd, ReadAfterLd]>; + Sched<[WriteCLMul.Folded, ReadAfterLd]>; // We can commute a load in the first operand by swapping the sources and // rotating the immediate. @@ -6830,26 +6849,26 @@ def EXTRQI : Ii8<0x78, MRMXr, (outs VR128:$dst), "extrq\t{$idx, $len, $src|$src, $len, $idx}", [(set VR128:$dst, (X86extrqi VR128:$src, imm:$len, imm:$idx))]>, - PD, Sched<[WriteVecALU]>; + PD, Sched<[SchedWriteVecALU.XMM]>; def EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src, VR128:$mask), "extrq\t{$mask, $src|$src, $mask}", [(set VR128:$dst, (int_x86_sse4a_extrq VR128:$src, VR128:$mask))]>, - PD, Sched<[WriteVecALU]>; + PD, Sched<[SchedWriteVecALU.XMM]>; def INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src, VR128:$src2, u8imm:$len, u8imm:$idx), "insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}", [(set VR128:$dst, (X86insertqi VR128:$src, VR128:$src2, imm:$len, imm:$idx))]>, - XD, Sched<[WriteVecALU]>; + XD, Sched<[SchedWriteVecALU.XMM]>; def INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src, VR128:$mask), "insertq\t{$mask, $src|$src, $mask}", [(set VR128:$dst, (int_x86_sse4a_insertq VR128:$src, VR128:$mask))]>, - XD, Sched<[WriteVecALU]>; + XD, Sched<[SchedWriteVecALU.XMM]>; } } // ExeDomain = SSEPackedInt @@ -7387,12 +7406,12 @@ multiclass avx2_broadcast<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (OpVT128 (X86VBroadcast (OpVT128 VR128:$src))))]>, - Sched<[WriteShuffle]>, VEX; + Sched<[SchedWriteShuffle.XMM]>, VEX; def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (OpVT128 (X86VBroadcast (ld_frag addr:$src))))]>, - Sched<[WriteShuffleLd]>, VEX; + Sched<[SchedWriteShuffle.XMM.Folded]>, VEX; def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, @@ -7402,7 +7421,7 @@ multiclass avx2_broadcast<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR256:$dst, (OpVT256 (X86VBroadcast (ld_frag addr:$src))))]>, - Sched<[WriteShuffleLd]>, VEX, VEX_L; + Sched<[SchedWriteShuffle.XMM.Folded]>, VEX, VEX_L; // Provide aliases for broadcast from the same register class that // automatically does the extract. @@ -7817,27 +7836,27 @@ multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode VR128:$src1, (vt128 VR128:$src2))))]>, - VEX_4V, Sched<[WriteVarVecShift]>; + VEX_4V, Sched<[SchedWriteVarVecShift.XMM]>; def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (vt128 (OpNode VR128:$src1, (vt128 (bitconvert (loadv2i64 addr:$src2))))))]>, - VEX_4V, Sched<[WriteVarVecShiftLd, ReadAfterLd]>; + VEX_4V, Sched<[SchedWriteVarVecShift.XMM.Folded, ReadAfterLd]>; def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode VR256:$src1, (vt256 VR256:$src2))))]>, - VEX_4V, VEX_L, Sched<[WriteVarVecShift]>; + VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM]>; def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (vt256 (OpNode VR256:$src1, (vt256 (bitconvert (loadv4i64 addr:$src2))))))]>, - VEX_4V, VEX_L, Sched<[WriteVarVecShiftLd, ReadAfterLd]>; + VEX_4V, VEX_L, Sched<[SchedWriteVarVecShift.YMM.Folded, ReadAfterLd]>; } let Predicates = [HasAVX2, NoVLX] in { @@ -7989,12 +8008,12 @@ multiclass GF2P8MULB_rm<string OpcodeStr, ValueType OpVT, let isCommutable = 1 in def rr : PDI<0xCF, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), "", [(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1, RC:$src2)))]>, - Sched<[WriteVecALU]>, T8PD; + Sched<[SchedWriteVecALU.XMM]>, T8PD; def rm : PDI<0xCF, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, X86MemOp:$src2), "", [(set RC:$dst, (OpVT (X86GF2P8mulb RC:$src1, (bitconvert (MemOpFrag addr:$src2)))))]>, - Sched<[WriteVecALU.Folded, ReadAfterLd]>, T8PD; + Sched<[SchedWriteVecALU.XMM.Folded, ReadAfterLd]>, T8PD; } } @@ -8007,13 +8026,13 @@ multiclass GF2P8AFFINE_rmi<bits<8> Op, string OpStr, ValueType OpVT, def rri : Ii8<Op, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$src3), "", [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))], - SSEPackedInt>, Sched<[WriteVecALU]>; + SSEPackedInt>, Sched<[SchedWriteVecALU.XMM]>; def rmi : Ii8<Op, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, X86MemOp:$src2, u8imm:$src3), "", [(set RC:$dst, (OpVT (OpNode RC:$src1, (bitconvert (MemOpFrag addr:$src2)), imm:$src3)))], SSEPackedInt>, - Sched<[WriteVecALU.Folded, ReadAfterLd]>; + Sched<[SchedWriteVecALU.XMM.Folded, ReadAfterLd]>; } } diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index 7b024d2715e..25c14f5c08a 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -244,12 +244,21 @@ def SchedWriteVecLogic def SchedWriteVecShift : X86SchedWriteWidths<WriteVecShift, WriteVecShift, WriteVecShift, WriteVecShift>; +def SchedWriteVarVecShift + : X86SchedWriteWidths<WriteVarVecShift, WriteVarVecShift, + WriteVarVecShift, WriteVarVecShift>; def SchedWriteVecIMul : X86SchedWriteWidths<WriteVecIMul, WriteVecIMul, WriteVecIMul, WriteVecIMul>; def SchedWritePMULLD : X86SchedWriteWidths<WritePMULLD, WritePMULLD, WritePMULLD, WritePMULLD>; +def SchedWriteMPSAD + : X86SchedWriteWidths<WriteMPSAD, WriteMPSAD, + WriteMPSAD, WriteMPSAD>; +def SchedWritePSADBW + : X86SchedWriteWidths<WritePSADBW, WritePSADBW, + WritePSADBW, WritePSADBW>; def SchedWriteShuffle : X86SchedWriteWidths<WriteShuffle, WriteShuffle, |

