diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-05-14 18:37:19 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-05-14 18:37:19 +0000 |
commit | 215ce4a1cadd10d2533efe601c0e1e665df6bba4 (patch) | |
tree | 1f1c9e731b88b3075cceb9899fc005dbcb963221 | |
parent | 62f7af712c0043e8bdb83e6c91bde43a11aab210 (diff) | |
download | bcm5719-llvm-215ce4a1cadd10d2533efe601c0e1e665df6bba4.tar.gz bcm5719-llvm-215ce4a1cadd10d2533efe601c0e1e665df6bba4.zip |
[X86] Add NT load/store scheduler classes
llvm-svn: 332274
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 6 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrMMX.td | 2 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 22 | ||||
-rwxr-xr-x | llvm/lib/Target/X86/X86SchedBroadwell.td | 16 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86SchedHaswell.td | 18 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86SchedSandyBridge.td | 16 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86SchedSkylakeClient.td | 16 | ||||
-rwxr-xr-x | llvm/lib/Target/X86/X86SchedSkylakeServer.td | 16 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86Schedule.td | 34 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ScheduleAtom.td | 14 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ScheduleBtVer2.td | 36 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ScheduleSLM.td | 16 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ScheduleZnver1.td | 20 |
13 files changed, 148 insertions, 84 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index cb995af87f1..d8991fa0e9f 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -4373,11 +4373,11 @@ multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr, } defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info, - SchedWriteVecMoveLS>, PD; + SchedWriteVecMoveLSNT>, PD; defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info, - SchedWriteFMoveLS>, PD, VEX_W; + SchedWriteFMoveLSNT>, PD, VEX_W; defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info, - SchedWriteFMoveLS>, PS; + SchedWriteFMoveLSNT>, PS; let Predicates = [HasAVX512], AddedComplexity = 400 in { def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst), diff --git a/llvm/lib/Target/X86/X86InstrMMX.td b/llvm/lib/Target/X86/X86InstrMMX.td index 57a5bee2b26..c716bbc1141 100644 --- a/llvm/lib/Target/X86/X86InstrMMX.td +++ b/llvm/lib/Target/X86/X86InstrMMX.td @@ -272,7 +272,7 @@ let Predicates = [HasSSE1] in def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), "movntq\t{$src, $dst|$dst, $src}", [(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)]>, - Sched<[SchedWriteVecMoveLS.MMX.MR]>; + Sched<[SchedWriteVecMoveLSNT.MMX.MR]>; let Predicates = [HasMMX] in { let AddedComplexity = 15 in diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 0731bdb9d97..cc615e4438d 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -3017,7 +3017,7 @@ defm : scalar_unary_math_patterns<int_x86_sse2_sqrt_sd, "SQRTSD", X86Movsd, let AddedComplexity = 400 in { // Prefer non-temporal versions let Predicates = [HasAVX, NoVLX] in { -let SchedRW = [SchedWriteFMoveLS.XMM.MR] in { +let SchedRW = [SchedWriteFMoveLSNT.XMM.MR] in { def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movntps\t{$src, $dst|$dst, $src}", @@ -3030,7 +3030,7 @@ def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs), addr:$dst)]>, VEX, VEX_WIG; } // SchedRW -let SchedRW = [SchedWriteFMoveLS.YMM.MR] in { +let SchedRW = [SchedWriteFMoveLSNT.YMM.MR] in { def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movntps\t{$src, $dst|$dst, $src}", @@ -3049,17 +3049,17 @@ def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), "movntdq\t{$src, $dst|$dst, $src}", [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)]>, VEX, VEX_WIG, - Sched<[SchedWriteVecMoveLS.XMM.MR]>; + Sched<[SchedWriteVecMoveLSNT.XMM.MR]>; def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), "movntdq\t{$src, $dst|$dst, $src}", [(alignednontemporalstore (v4i64 VR256:$src), addr:$dst)]>, VEX, VEX_L, VEX_WIG, - Sched<[SchedWriteVecMoveLS.YMM.MR]>; + Sched<[SchedWriteVecMoveLSNT.YMM.MR]>; } // ExeDomain } // Predicates -let SchedRW = [SchedWriteFMoveLS.XMM.MR] in { +let SchedRW = [SchedWriteFMoveLSNT.XMM.MR] in { def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movntps\t{$src, $dst|$dst, $src}", [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>; @@ -3068,12 +3068,12 @@ def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>; } // SchedRW -let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLS.XMM.MR] in +let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLSNT.XMM.MR] in def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movntdq\t{$src, $dst|$dst, $src}", [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)]>; -let SchedRW = [WriteStore] in { +let SchedRW = [WriteStoreNT] in { // There is no AVX form for instructions below this point def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), "movnti{l}\t{$src, $dst|$dst, $src}", @@ -6409,14 +6409,14 @@ let AddedComplexity = 400 in { // Prefer non-temporal versions let Predicates = [HasAVX, NoVLX] in def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", []>, - Sched<[SchedWriteVecMoveLS.XMM.RM]>, VEX, VEX_WIG; + Sched<[SchedWriteVecMoveLSNT.XMM.RM]>, VEX, VEX_WIG; let Predicates = [HasAVX2, NoVLX] in def VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", []>, - Sched<[SchedWriteVecMoveLS.YMM.RM]>, VEX, VEX_L, VEX_WIG; + Sched<[SchedWriteVecMoveLSNT.YMM.RM]>, VEX, VEX_L, VEX_WIG; def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movntdqa\t{$src, $dst|$dst, $src}", []>, - Sched<[SchedWriteVecMoveLS.XMM.RM]>; + Sched<[SchedWriteVecMoveLSNT.XMM.RM]>; let Predicates = [HasAVX2, NoVLX] in { def : Pat<(v8f32 (alignednontemporalload addr:$src)), @@ -6935,7 +6935,7 @@ def INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst), // Non-temporal (unaligned) scalar stores. let AddedComplexity = 400 in { // Prefer non-temporal versions -let hasSideEffects = 0, mayStore = 1, SchedRW = [WriteStore] in { +let hasSideEffects = 0, mayStore = 1, SchedRW = [SchedWriteFMoveLSNT.Scl.MR] in { def MOVNTSS : I<0x2B, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src), "movntss\t{$src, $dst|$dst, $src}", []>, XS; diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 7efc3a28481..3af49f62428 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -147,9 +147,10 @@ defm : BWWriteResPair<WriteBEXTR, [BWPort06,BWPort15], 2, [1,1], 2>; defm : BWWriteResPair<WriteBZHI, [BWPort15], 1>; // Loads, stores, and moves, not folded with other operations. -def : WriteRes<WriteLoad, [BWPort23]> { let Latency = 5; } -def : WriteRes<WriteStore, [BWPort237, BWPort4]>; -def : WriteRes<WriteMove, [BWPort0156]>; +defm : X86WriteRes<WriteLoad, [BWPort23], 5, [1], 1>; +defm : X86WriteRes<WriteStore, [BWPort237, BWPort4], 1, [1,1], 1>; +defm : X86WriteRes<WriteStoreNT, [BWPort237, BWPort4], 1, [1,1], 2>; +defm : X86WriteRes<WriteMove, [BWPort0156], 1, [1,1], 1>; // Idioms that clear a register, like xorps %xmm0, %xmm0. // These can often bypass execution ports completely. @@ -171,6 +172,9 @@ defm : X86WriteRes<WriteFMaskedLoadY, [BWPort23,BWPort5], 8, [1,2], 3>; defm : X86WriteRes<WriteFStore, [BWPort237,BWPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteFStoreX, [BWPort237,BWPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteFStoreY, [BWPort237,BWPort4], 1, [1,1], 2>; +defm : X86WriteRes<WriteFStoreNT, [BWPort237,BWPort4], 1, [1,1], 2>; +defm : X86WriteRes<WriteFStoreNTX, [BWPort237,BWPort4], 1, [1,1], 2>; +defm : X86WriteRes<WriteFStoreNTY, [BWPort237,BWPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteFMaskedStore, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>; defm : X86WriteRes<WriteFMaskedStoreY, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>; defm : X86WriteRes<WriteFMove, [BWPort5], 1, [1], 1>; @@ -266,11 +270,15 @@ def : WriteRes<WriteCvtF2FSt, [BWPort1,BWPort4,BWPort237]> { defm : X86WriteRes<WriteVecLoad, [BWPort23], 5, [1], 1>; defm : X86WriteRes<WriteVecLoadX, [BWPort23], 5, [1], 1>; defm : X86WriteRes<WriteVecLoadY, [BWPort23], 6, [1], 1>; +defm : X86WriteRes<WriteVecLoadNT, [BWPort23], 5, [1], 1>; +defm : X86WriteRes<WriteVecLoadNTY, [BWPort23], 6, [1], 1>; defm : X86WriteRes<WriteVecMaskedLoad, [BWPort23,BWPort5], 7, [1,2], 3>; defm : X86WriteRes<WriteVecMaskedLoadY, [BWPort23,BWPort5], 8, [1,2], 3>; defm : X86WriteRes<WriteVecStore, [BWPort237,BWPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteVecStoreX, [BWPort237,BWPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteVecStoreY, [BWPort237,BWPort4], 1, [1,1], 2>; +defm : X86WriteRes<WriteVecStoreNT, [BWPort237,BWPort4], 1, [1,1], 2>; +defm : X86WriteRes<WriteVecStoreNTY, [BWPort237,BWPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteVecMaskedStore, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>; defm : X86WriteRes<WriteVecMaskedStoreY, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>; defm : X86WriteRes<WriteVecMove, [BWPort015], 1, [1], 1>; @@ -578,8 +586,6 @@ def BWWriteResGroup10 : SchedWriteRes<[BWPort4,BWPort237]> { } def: InstRW<[BWWriteResGroup10], (instregex "FBSTPm", "MMX_MOVD64mr", - "MOVNTI_64mr", - "MOVNTImr", "ST_FP(32|64|80)m", "(V?)MOV(H|L)(PD|PS)mr", "(V?)MOVPDI2DImr", diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index 0a64f2a37fe..7eba2469eb9 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -107,10 +107,11 @@ def : WriteRes<WriteRMW, [HWPort237,HWPort4]>; // Store_addr on 237. // Store_data on 4. -def : WriteRes<WriteStore, [HWPort237, HWPort4]>; -def : WriteRes<WriteLoad, [HWPort23]> { let Latency = 5; } -def : WriteRes<WriteMove, [HWPort0156]>; -def : WriteRes<WriteZero, []>; +defm : X86WriteRes<WriteStore, [HWPort237, HWPort4], 1, [1,1], 1>; +defm : X86WriteRes<WriteStoreNT, [HWPort237, HWPort4], 1, [1,1], 2>; +defm : X86WriteRes<WriteLoad, [HWPort23], 5, [1], 1>; +defm : X86WriteRes<WriteMove, [HWPort0156], 1, [1], 1>; +def : WriteRes<WriteZero, []>; defm : HWWriteResPair<WriteALU, [HWPort0156], 1>; defm : HWWriteResPair<WriteIMul, [HWPort1], 3>; @@ -161,6 +162,9 @@ defm : X86WriteRes<WriteFMaskedLoadY, [HWPort23,HWPort5], 9, [1,2], 3>; defm : X86WriteRes<WriteFStore, [HWPort237,HWPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteFStoreX, [HWPort237,HWPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteFStoreY, [HWPort237,HWPort4], 1, [1,1], 2>; +defm : X86WriteRes<WriteFStoreNT, [HWPort237,HWPort4], 1, [1,1], 2>; +defm : X86WriteRes<WriteFStoreNTX, [HWPort237,HWPort4], 1, [1,1], 2>; +defm : X86WriteRes<WriteFStoreNTY, [HWPort237,HWPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteFMaskedStore, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>; defm : X86WriteRes<WriteFMaskedStoreY, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>; defm : X86WriteRes<WriteFMove, [HWPort5], 1, [1], 1>; @@ -257,11 +261,15 @@ def : WriteRes<WriteCvtF2FSt, [HWPort1,HWPort4,HWPort5,HWPort237]> { defm : X86WriteRes<WriteVecLoad, [HWPort23], 5, [1], 1>; defm : X86WriteRes<WriteVecLoadX, [HWPort23], 6, [1], 1>; defm : X86WriteRes<WriteVecLoadY, [HWPort23], 7, [1], 1>; +defm : X86WriteRes<WriteVecLoadNT, [HWPort23], 6, [1], 1>; +defm : X86WriteRes<WriteVecLoadNTY, [HWPort23], 7, [1], 1>; defm : X86WriteRes<WriteVecMaskedLoad, [HWPort23,HWPort5], 8, [1,2], 3>; defm : X86WriteRes<WriteVecMaskedLoadY, [HWPort23,HWPort5], 9, [1,2], 3>; defm : X86WriteRes<WriteVecStore, [HWPort237,HWPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteVecStoreX, [HWPort237,HWPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteVecStoreY, [HWPort237,HWPort4], 1, [1,1], 2>; +defm : X86WriteRes<WriteVecStoreNT, [HWPort237,HWPort4], 1, [1,1], 2>; +defm : X86WriteRes<WriteVecStoreNTY, [HWPort237,HWPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteVecMaskedStore, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>; defm : X86WriteRes<WriteVecMaskedStoreY, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>; defm : X86WriteRes<WriteVecMove, [HWPort015], 1, [1], 1>; @@ -754,8 +762,6 @@ def HWWriteResGroup1 : SchedWriteRes<[HWPort4,HWPort237]> { } def: InstRW<[HWWriteResGroup1], (instregex "FBSTPm", "MMX_MOVD64mr", - "MOVNTI_64mr", - "MOVNTImr", "ST_FP(32|64|80)m", "(V?)MOV(H|L)(PD|PS)mr", "(V?)MOVPDI2DImr", diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index be380ffa2ff..953d0c266fe 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -96,10 +96,11 @@ multiclass SBWriteResPair<X86FoldableSchedWrite SchedRW, // 2/3 cycle to recompute the address. def : WriteRes<WriteRMW, [SBPort23,SBPort4]>; -def : WriteRes<WriteStore, [SBPort23, SBPort4]>; -def : WriteRes<WriteLoad, [SBPort23]> { let Latency = 5; } -def : WriteRes<WriteMove, [SBPort015]>; -def : WriteRes<WriteZero, []>; +def : WriteRes<WriteStore, [SBPort23, SBPort4]>; +def : WriteRes<WriteStoreNT, [SBPort23, SBPort4]>; +def : WriteRes<WriteLoad, [SBPort23]> { let Latency = 5; } +def : WriteRes<WriteMove, [SBPort015]>; +def : WriteRes<WriteZero, []>; defm : SBWriteResPair<WriteALU, [SBPort015], 1>; defm : SBWriteResPair<WriteIMul, [SBPort1], 3>; @@ -153,6 +154,9 @@ defm : X86WriteRes<WriteFMaskedLoadY, [SBPort23,SBPort05], 9, [1,2], 3>; defm : X86WriteRes<WriteFStore, [SBPort23,SBPort4], 1, [1,1], 1>; defm : X86WriteRes<WriteFStoreX, [SBPort23,SBPort4], 1, [1,1], 1>; defm : X86WriteRes<WriteFStoreY, [SBPort23,SBPort4], 1, [1,1], 1>; +defm : X86WriteRes<WriteFStoreNT, [SBPort23,SBPort4], 1, [1,1], 1>; +defm : X86WriteRes<WriteFStoreNTX, [SBPort23,SBPort4], 1, [1,1], 1>; +defm : X86WriteRes<WriteFStoreNTY, [SBPort23,SBPort4], 1, [1,1], 1>; defm : X86WriteRes<WriteFMaskedStore, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>; defm : X86WriteRes<WriteFMaskedStoreY, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>; defm : X86WriteRes<WriteFMove, [SBPort5], 1, [1], 1>; @@ -237,11 +241,15 @@ def : WriteRes<WriteCvtF2FSt, [SBPort1, SBPort23, SBPort4]> { let Latency = 4; defm : X86WriteRes<WriteVecLoad, [SBPort23], 5, [1], 1>; defm : X86WriteRes<WriteVecLoadX, [SBPort23], 6, [1], 1>; defm : X86WriteRes<WriteVecLoadY, [SBPort23], 7, [1], 1>; +defm : X86WriteRes<WriteVecLoadNT, [SBPort23], 6, [1], 1>; +defm : X86WriteRes<WriteVecLoadNTY, [SBPort23], 7, [1], 1>; defm : X86WriteRes<WriteVecMaskedLoad, [SBPort23,SBPort05], 8, [1,2], 3>; defm : X86WriteRes<WriteVecMaskedLoadY, [SBPort23,SBPort05], 9, [1,2], 3>; defm : X86WriteRes<WriteVecStore, [SBPort23,SBPort4], 1, [1,1], 1>; defm : X86WriteRes<WriteVecStoreX, [SBPort23,SBPort4], 1, [1,1], 1>; defm : X86WriteRes<WriteVecStoreY, [SBPort23,SBPort4], 1, [1,1], 1>; +defm : X86WriteRes<WriteVecStoreNT, [SBPort23,SBPort4], 1, [1,1], 1>; +defm : X86WriteRes<WriteVecStoreNTY, [SBPort23,SBPort4], 1, [1,1], 1>; defm : X86WriteRes<WriteVecMaskedStore, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>; defm : X86WriteRes<WriteVecMaskedStoreY, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>; defm : X86WriteRes<WriteVecMove, [SBPort05], 1, [1], 1>; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index e5019938dbd..a986d883c0a 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -145,9 +145,10 @@ defm : SKLWriteResPair<WriteBEXTR, [SKLPort06,SKLPort15], 2, [1,1], 2>; defm : SKLWriteResPair<WriteBZHI, [SKLPort15], 1>; // Loads, stores, and moves, not folded with other operations. -def : WriteRes<WriteLoad, [SKLPort23]> { let Latency = 5; } -def : WriteRes<WriteStore, [SKLPort237, SKLPort4]>; -def : WriteRes<WriteMove, [SKLPort0156]>; +defm : X86WriteRes<WriteLoad, [SKLPort23], 5, [1], 1>; +defm : X86WriteRes<WriteStore, [SKLPort237, SKLPort4], 1, [1,1], 1>; +defm : X86WriteRes<WriteStoreNT, [SKLPort237, SKLPort4], 1, [1,1], 2>; +defm : X86WriteRes<WriteMove, [SKLPort0156], 1, [1], 1>; // Idioms that clear a register, like xorps %xmm0, %xmm0. // These can often bypass execution ports completely. @@ -166,6 +167,9 @@ defm : X86WriteRes<WriteFMaskedLoadY, [SKLPort23,SKLPort015], 8, [1,1], 2>; defm : X86WriteRes<WriteFStore, [SKLPort237,SKLPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteFStoreX, [SKLPort237,SKLPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteFStoreY, [SKLPort237,SKLPort4], 1, [1,1], 2>; +defm : X86WriteRes<WriteFStoreNT, [SKLPort237,SKLPort4], 1, [1,1], 2>; +defm : X86WriteRes<WriteFStoreNTX, [SKLPort237,SKLPort4], 1, [1,1], 2>; +defm : X86WriteRes<WriteFStoreNTY, [SKLPort237,SKLPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteFMaskedStore, [SKLPort237,SKLPort0], 2, [1,1], 2>; defm : X86WriteRes<WriteFMaskedStoreY, [SKLPort237,SKLPort0], 2, [1,1], 2>; defm : X86WriteRes<WriteFMove, [SKLPort015], 1, [1], 1>; @@ -258,11 +262,15 @@ def : WriteRes<WriteCvtF2FSt, [SKLPort4,SKLPort5,SKLPort237,SKLPort01]> { defm : X86WriteRes<WriteVecLoad, [SKLPort23], 5, [1], 1>; defm : X86WriteRes<WriteVecLoadX, [SKLPort23], 6, [1], 1>; defm : X86WriteRes<WriteVecLoadY, [SKLPort23], 7, [1], 1>; +defm : X86WriteRes<WriteVecLoadNT, [SKLPort23], 6, [1], 1>; +defm : X86WriteRes<WriteVecLoadNTY, [SKLPort23], 7, [1], 1>; defm : X86WriteRes<WriteVecMaskedLoad, [SKLPort23,SKLPort015], 7, [1,1], 2>; defm : X86WriteRes<WriteVecMaskedLoadY, [SKLPort23,SKLPort015], 8, [1,1], 2>; defm : X86WriteRes<WriteVecStore, [SKLPort237,SKLPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteVecStoreX, [SKLPort237,SKLPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteVecStoreY, [SKLPort237,SKLPort4], 1, [1,1], 2>; +defm : X86WriteRes<WriteVecStoreNT, [SKLPort237,SKLPort4], 1, [1,1], 2>; +defm : X86WriteRes<WriteVecStoreNTY, [SKLPort237,SKLPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteVecMaskedStore, [SKLPort237,SKLPort0], 2, [1,1], 2>; defm : X86WriteRes<WriteVecMaskedStoreY, [SKLPort237,SKLPort0], 2, [1,1], 2>; defm : X86WriteRes<WriteVecMove, [SKLPort015], 1, [1], 1>; @@ -580,8 +588,6 @@ def SKLWriteResGroup11 : SchedWriteRes<[SKLPort4,SKLPort237]> { } def: InstRW<[SKLWriteResGroup11], (instregex "FBSTPm", "MMX_MOVD64mr", - "MOVNTI_64mr", - "MOVNTImr", "ST_FP(32|64|80)m", "(V?)MOV(H|L)(PD|PS)mr", "(V?)MOVPDI2DImr", diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 4bd95ea494d..01bbb035d46 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -145,9 +145,10 @@ defm : SKXWriteResPair<WriteBEXTR, [SKXPort06,SKXPort15], 2, [1,1], 2>; defm : SKXWriteResPair<WriteBZHI, [SKXPort15], 1>; // Loads, stores, and moves, not folded with other operations. -def : WriteRes<WriteLoad, [SKXPort23]> { let Latency = 5; } -def : WriteRes<WriteStore, [SKXPort237, SKXPort4]>; -def : WriteRes<WriteMove, [SKXPort0156]>; +defm : X86WriteRes<WriteLoad, [SKXPort23], 5, [1], 1>; +defm : X86WriteRes<WriteStore, [SKXPort237, SKXPort4], 1, [1,1], 1>; +defm : X86WriteRes<WriteStoreNT, [SKXPort237, SKXPort4], 1, [1,1], 2>; +defm : X86WriteRes<WriteMove, [SKXPort0156], 1, [1], 1>; // Idioms that clear a register, like xorps %xmm0, %xmm0. // These can often bypass execution ports completely. @@ -166,6 +167,9 @@ defm : X86WriteRes<WriteFMaskedLoadY, [SKXPort23,SKXPort015], 8, [1,1], 2>; defm : X86WriteRes<WriteFStore, [SKXPort237,SKXPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteFStoreX, [SKXPort237,SKXPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteFStoreY, [SKXPort237,SKXPort4], 1, [1,1], 2>; +defm : X86WriteRes<WriteFStoreNT, [SKXPort237,SKXPort4], 1, [1,1], 2>; +defm : X86WriteRes<WriteFStoreNTX, [SKXPort237,SKXPort4], 1, [1,1], 2>; +defm : X86WriteRes<WriteFStoreNTY, [SKXPort237,SKXPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteFMaskedStore, [SKXPort237,SKXPort0], 2, [1,1], 2>; defm : X86WriteRes<WriteFMaskedStoreY, [SKXPort237,SKXPort0], 2, [1,1], 2>; defm : X86WriteRes<WriteFMove, [SKXPort015], 1, [1], 1>; @@ -258,11 +262,15 @@ def : WriteRes<WriteCvtF2FSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort015]> { defm : X86WriteRes<WriteVecLoad, [SKXPort23], 5, [1], 1>; defm : X86WriteRes<WriteVecLoadX, [SKXPort23], 6, [1], 1>; defm : X86WriteRes<WriteVecLoadY, [SKXPort23], 7, [1], 1>; +defm : X86WriteRes<WriteVecLoadNT, [SKXPort23], 6, [1], 1>; +defm : X86WriteRes<WriteVecLoadNTY, [SKXPort23], 7, [1], 1>; defm : X86WriteRes<WriteVecMaskedLoad, [SKXPort23,SKXPort015], 7, [1,1], 2>; defm : X86WriteRes<WriteVecMaskedLoadY, [SKXPort23,SKXPort015], 8, [1,1], 2>; defm : X86WriteRes<WriteVecStore, [SKXPort237,SKXPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteVecStoreX, [SKXPort237,SKXPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteVecStoreY, [SKXPort237,SKXPort4], 1, [1,1], 2>; +defm : X86WriteRes<WriteVecStoreNT, [SKXPort237,SKXPort4], 1, [1,1], 2>; +defm : X86WriteRes<WriteVecStoreNTY, [SKXPort237,SKXPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteVecMaskedStore, [SKXPort237,SKXPort0], 2, [1,1], 2>; defm : X86WriteRes<WriteVecMaskedStoreY, [SKXPort237,SKXPort0], 2, [1,1], 2>; defm : X86WriteRes<WriteVecMove, [SKXPort015], 1, [1], 1>; @@ -605,8 +613,6 @@ def SKXWriteResGroup11 : SchedWriteRes<[SKXPort4,SKXPort237]> { def: InstRW<[SKXWriteResGroup11], (instregex "FBSTPm", "KMOV(B|D|Q|W)mk", "MMX_MOVD64mr", - "MOVNTI_64mr", - "MOVNTImr", "ST_FP(32|64|80)m", "VMOV(H|L)(PD|PS)Z128mr(b?)", "(V?)MOV(H|L)(PD|PS)mr", diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index a0a9a911788..6bbf456e588 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -90,9 +90,10 @@ class X86SchedWriteMoveLSWidths<X86SchedWriteMoveLS sScl, } // Loads, stores, and moves, not folded with other operations. -def WriteLoad : SchedWrite; -def WriteStore : SchedWrite; -def WriteMove : SchedWrite; +def WriteLoad : SchedWrite; +def WriteStore : SchedWrite; +def WriteStoreNT : SchedWrite; +def WriteMove : SchedWrite; // Arithmetic. defm WriteALU : X86SchedWritePair; // Simple integer ALU op. @@ -145,6 +146,9 @@ def WriteFMaskedLoadY : SchedWrite; def WriteFStore : SchedWrite; def WriteFStoreX : SchedWrite; def WriteFStoreY : SchedWrite; +def WriteFStoreNT : SchedWrite; +def WriteFStoreNTX : SchedWrite; +def WriteFStoreNTY : SchedWrite; def WriteFMaskedStore : SchedWrite; def WriteFMaskedStoreY : SchedWrite; def WriteFMove : SchedWrite; @@ -229,11 +233,15 @@ defm WritePHAddY : X86SchedWritePair; // YMM/ZMM. def WriteVecLoad : SchedWrite; def WriteVecLoadX : SchedWrite; def WriteVecLoadY : SchedWrite; +def WriteVecLoadNT : SchedWrite; +def WriteVecLoadNTY : SchedWrite; def WriteVecMaskedLoad : SchedWrite; def WriteVecMaskedLoadY : SchedWrite; def WriteVecStore : SchedWrite; def WriteVecStoreX : SchedWrite; def WriteVecStoreY : SchedWrite; +def WriteVecStoreNT : SchedWrite; +def WriteVecStoreNTY : SchedWrite; def WriteVecMaskedStore : SchedWrite; def WriteVecMaskedStoreY : SchedWrite; def WriteVecMove : SchedWrite; @@ -352,6 +360,16 @@ def SchedWriteFMoveLS : X86SchedWriteMoveLSWidths<WriteFMoveLS, WriteFMoveLSX, WriteFMoveLSY, WriteFMoveLSY>; +def WriteFMoveLSNT + : X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStoreNT>; +def WriteFMoveLSNTX + : X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStoreNTX>; +def WriteFMoveLSNTY + : X86SchedWriteMoveLS<WriteFMoveY, WriteFLoadY, WriteFStoreNTY>; +def SchedWriteFMoveLSNT + : X86SchedWriteMoveLSWidths<WriteFMoveLSNT, WriteFMoveLSNTX, + WriteFMoveLSNTY, WriteFMoveLSNTY>; + def WriteVecMoveLS : X86SchedWriteMoveLS<WriteVecMove, WriteVecLoad, WriteVecStore>; def WriteVecMoveLSX @@ -362,6 +380,16 @@ def SchedWriteVecMoveLS : X86SchedWriteMoveLSWidths<WriteVecMoveLS, WriteVecMoveLSX, WriteVecMoveLSY, WriteVecMoveLSY>; +def WriteVecMoveLSNT + : X86SchedWriteMoveLS<WriteVecMove, WriteVecLoadNT, WriteVecStoreNT>; +def WriteVecMoveLSNTX + : X86SchedWriteMoveLS<WriteVecMoveX, WriteVecLoadNT, WriteVecStoreNT>; +def WriteVecMoveLSNTY + : X86SchedWriteMoveLS<WriteVecMoveY, WriteVecLoadNTY, WriteVecStoreNTY>; +def SchedWriteVecMoveLSNT + : X86SchedWriteMoveLSWidths<WriteVecMoveLSNT, WriteVecMoveLSNTX, + WriteVecMoveLSNTY, WriteVecMoveLSNTY>; + // Vector width wrappers. def SchedWriteFAdd : X86SchedWriteWidths<WriteFAdd, WriteFAddX, WriteFAddY, WriteFAddY>; diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index b69a6281fd4..32e84a169ea 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -144,9 +144,10 @@ defm : AtomWriteResPair<WriteShift, [AtomPort0], [AtomPort0]>; // Loads, stores, and moves, not folded with other operations. //////////////////////////////////////////////////////////////////////////////// -def : WriteRes<WriteLoad, [AtomPort0]>; -def : WriteRes<WriteStore, [AtomPort0]>; -def : WriteRes<WriteMove, [AtomPort01]>; +def : WriteRes<WriteLoad, [AtomPort0]>; +def : WriteRes<WriteStore, [AtomPort0]>; +def : WriteRes<WriteStoreNT, [AtomPort0]>; +def : WriteRes<WriteMove, [AtomPort01]>; // Treat misc copies as a move. def : InstRW<[WriteMove], (instrs COPY)>; @@ -190,6 +191,9 @@ def : WriteRes<WriteFMaskedLoadY, [AtomPort0]>; def : WriteRes<WriteFStore, [AtomPort0]>; def : WriteRes<WriteFStoreX, [AtomPort0]>; def : WriteRes<WriteFStoreY, [AtomPort0]>; +def : WriteRes<WriteFStoreNT, [AtomPort0]>; +def : WriteRes<WriteFStoreNTX, [AtomPort0]>; +def : WriteRes<WriteFStoreNTY, [AtomPort0]>; def : WriteRes<WriteFMaskedStore, [AtomPort0]>; def : WriteRes<WriteFMaskedStoreY, [AtomPort0]>; @@ -281,12 +285,16 @@ def : WriteRes<WriteCvtF2FSt, [AtomPort0]>; // NOTE: Doesn't exist on Atom. def : WriteRes<WriteVecLoad, [AtomPort0]>; def : WriteRes<WriteVecLoadX, [AtomPort0]>; def : WriteRes<WriteVecLoadY, [AtomPort0]>; +def : WriteRes<WriteVecLoadNT, [AtomPort0]>; +def : WriteRes<WriteVecLoadNTY, [AtomPort0]>; def : WriteRes<WriteVecMaskedLoad, [AtomPort0]>; def : WriteRes<WriteVecMaskedLoadY, [AtomPort0]>; def : WriteRes<WriteVecStore, [AtomPort0]>; def : WriteRes<WriteVecStoreX, [AtomPort0]>; def : WriteRes<WriteVecStoreY, [AtomPort0]>; +def : WriteRes<WriteVecStoreNT, [AtomPort0]>; +def : WriteRes<WriteVecStoreNTY, [AtomPort0]>; def : WriteRes<WriteVecMaskedStore, [AtomPort0]>; def : WriteRes<WriteVecMaskedStoreY, [AtomPort0]>; diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index 96597456f42..aa0803ff2f0 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -226,9 +226,10 @@ def: InstRW<[JWriteSHLDm],(instrs SHLD16mri8, SHLD32mri8, SHLD64mri8, // Loads, stores, and moves, not folded with other operations. //////////////////////////////////////////////////////////////////////////////// -def : WriteRes<WriteLoad, [JLAGU]> { let Latency = 5; } -def : WriteRes<WriteStore, [JSAGU]>; -def : WriteRes<WriteMove, [JALU01]>; +def : WriteRes<WriteLoad, [JLAGU]> { let Latency = 5; } +def : WriteRes<WriteStore, [JSAGU]>; +def : WriteRes<WriteStoreNT, [JSAGU]>; +def : WriteRes<WriteMove, [JALU01]>; // Load/store MXCSR. // FIXME: These are copy and pasted from WriteLoad/Store. @@ -277,6 +278,9 @@ defm : X86WriteRes<WriteFMaskedLoadY, [JLAGU, JFPU01, JFPX], 6, [2, 2, 4], 2>; defm : X86WriteRes<WriteFStore, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>; defm : X86WriteRes<WriteFStoreX, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>; defm : X86WriteRes<WriteFStoreY, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>; +defm : X86WriteRes<WriteFStoreNT, [JSAGU, JFPU1, JSTC], 3, [1, 1, 1], 1>; +defm : X86WriteRes<WriteFStoreNTX, [JSAGU, JFPU1, JSTC], 3, [1, 1, 1], 1>; +defm : X86WriteRes<WriteFStoreNTY, [JSAGU, JFPU1, JSTC], 3, [2, 2, 2], 1>; defm : X86WriteRes<WriteFMaskedStore, [JSAGU, JFPU01, JFPX], 6, [1, 1, 4], 1>; defm : X86WriteRes<WriteFMaskedStoreY, [JSAGU, JFPU01, JFPX], 6, [2, 2, 4], 2>; @@ -407,12 +411,16 @@ def : InstRW<[JWriteCVTSI2FLd], (instregex "(V)?CVTSI(64)?2S(D|S)rm")>; defm : X86WriteRes<WriteVecLoad, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>; defm : X86WriteRes<WriteVecLoadX, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>; defm : X86WriteRes<WriteVecLoadY, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>; +defm : X86WriteRes<WriteVecLoadNT, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>; +defm : X86WriteRes<WriteVecLoadNTY, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>; defm : X86WriteRes<WriteVecMaskedLoad, [JLAGU, JFPU01, JVALU], 6, [1, 1, 2], 1>; defm : X86WriteRes<WriteVecMaskedLoadY, [JLAGU, JFPU01, JVALU], 6, [2, 2, 4], 2>; defm : X86WriteRes<WriteVecStore, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>; defm : X86WriteRes<WriteVecStoreX, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>; defm : X86WriteRes<WriteVecStoreY, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>; +defm : X86WriteRes<WriteVecStoreNT, [JSAGU, JFPU1, JSTC], 2, [1, 1, 1], 1>; +defm : X86WriteRes<WriteVecStoreNTY, [JSAGU, JFPU1, JSTC], 2, [2, 2, 2], 1>; defm : X86WriteRes<WriteVecMaskedStore, [JSAGU, JFPU01, JVALU], 6, [1, 1, 4], 1>; defm : X86WriteRes<WriteVecMaskedStoreY, [JSAGU, JFPU01, JVALU], 6, [2, 2, 4], 2>; @@ -572,28 +580,6 @@ def JWriteVCVTYLd: SchedWriteRes<[JLAGU, JFPU1, JSTC]> { def : InstRW<[JWriteVCVTYLd, ReadAfterLd], (instrs VCVTDQ2PDYrm, VCVTDQ2PSYrm, VCVTPS2DQYrm, VCVTTPS2DQYrm)>; -def JWriteVMOVNTDQSt: SchedWriteRes<[JFPU1, JSTC, JSAGU]> { - let Latency = 2; -} -def : InstRW<[JWriteVMOVNTDQSt], (instrs MMX_MOVNTQmr, MOVNTDQmr, VMOVNTDQmr)>; - -def JWriteVMOVNTDQYSt: SchedWriteRes<[JFPU1, JSTC, JSAGU]> { - let Latency = 2; - let ResourceCycles = [2, 2, 2]; -} -def : InstRW<[JWriteVMOVNTDQYSt], (instrs VMOVNTDQYmr)>; - -def JWriteMOVNTSt: SchedWriteRes<[JFPU1, JSTC, JSAGU]> { - let Latency = 3; -} -def : InstRW<[JWriteMOVNTSt], (instrs MOVNTPDmr, MOVNTPSmr, MOVNTSD, MOVNTSS, VMOVNTPDmr, VMOVNTPSmr)>; - -def JWriteVMOVNTPYSt: SchedWriteRes<[JFPU1, JSTC, JSAGU]> { - let Latency = 3; - let ResourceCycles = [2, 2, 2]; -} -def : InstRW<[JWriteVMOVNTPYSt], (instrs VMOVNTPDYmr, VMOVNTPSYmr)>; - def JWriteVCVTPDY: SchedWriteRes<[JFPU1, JSTC, JFPX]> { let Latency = 6; let ResourceCycles = [2, 2, 4]; diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index ab39b241864..cec2768b792 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -79,10 +79,11 @@ multiclass SLMWriteResPair<X86FoldableSchedWrite SchedRW, // need an extra port cycle to recompute the address. def : WriteRes<WriteRMW, [SLM_MEC_RSV]>; -def : WriteRes<WriteStore, [SLM_IEC_RSV01, SLM_MEC_RSV]>; -def : WriteRes<WriteLoad, [SLM_MEC_RSV]> { let Latency = 3; } -def : WriteRes<WriteMove, [SLM_IEC_RSV01]>; -def : WriteRes<WriteZero, []>; +def : WriteRes<WriteStore, [SLM_IEC_RSV01, SLM_MEC_RSV]>; +def : WriteRes<WriteStoreNT, [SLM_IEC_RSV01, SLM_MEC_RSV]>; +def : WriteRes<WriteLoad, [SLM_MEC_RSV]> { let Latency = 3; } +def : WriteRes<WriteMove, [SLM_IEC_RSV01]>; +def : WriteRes<WriteZero, []>; // Load/store MXCSR. // FIXME: These are probably wrong. They are copy pasted from WriteStore/Load. @@ -141,6 +142,9 @@ def : WriteRes<WriteFMaskedLoadY, [SLM_MEC_RSV]> { let Latency = 3; } def : WriteRes<WriteFStore, [SLM_MEC_RSV]>; def : WriteRes<WriteFStoreX, [SLM_MEC_RSV]>; def : WriteRes<WriteFStoreY, [SLM_MEC_RSV]>; +def : WriteRes<WriteFStoreNT, [SLM_MEC_RSV]>; +def : WriteRes<WriteFStoreNTX, [SLM_MEC_RSV]>; +def : WriteRes<WriteFStoreNTY, [SLM_MEC_RSV]>; def : WriteRes<WriteFMaskedStore, [SLM_MEC_RSV]>; def : WriteRes<WriteFMaskedStoreY, [SLM_MEC_RSV]>; def : WriteRes<WriteFMove, [SLM_FPC_RSV01]>; @@ -214,11 +218,15 @@ def : WriteRes<WriteCvtF2FSt, [SLM_FPC_RSV01, SLM_MEC_RSV]>; def : WriteRes<WriteVecLoad, [SLM_MEC_RSV]> { let Latency = 3; } def : WriteRes<WriteVecLoadX, [SLM_MEC_RSV]> { let Latency = 3; } def : WriteRes<WriteVecLoadY, [SLM_MEC_RSV]> { let Latency = 3; } +def : WriteRes<WriteVecLoadNT, [SLM_MEC_RSV]> { let Latency = 3; } +def : WriteRes<WriteVecLoadNTY, [SLM_MEC_RSV]> { let Latency = 3; } def : WriteRes<WriteVecMaskedLoad, [SLM_MEC_RSV]> { let Latency = 3; } def : WriteRes<WriteVecMaskedLoadY, [SLM_MEC_RSV]> { let Latency = 3; } def : WriteRes<WriteVecStore, [SLM_MEC_RSV]>; def : WriteRes<WriteVecStoreX, [SLM_MEC_RSV]>; def : WriteRes<WriteVecStoreY, [SLM_MEC_RSV]>; +def : WriteRes<WriteVecStoreNT, [SLM_MEC_RSV]>; +def : WriteRes<WriteVecStoreNTY, [SLM_MEC_RSV]>; def : WriteRes<WriteVecMaskedStore, [SLM_MEC_RSV]>; def : WriteRes<WriteVecMaskedStoreY, [SLM_MEC_RSV]>; def : WriteRes<WriteVecMove, [SLM_FPC_RSV01]>; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index 0e68fabb017..ecf5ea62b44 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -143,9 +143,10 @@ multiclass ZnWriteResFpuPair<X86FoldableSchedWrite SchedRW, // operation in codegen def : WriteRes<WriteRMW, [ZnAGU]>; -def : WriteRes<WriteStore, [ZnAGU]>; -def : WriteRes<WriteMove, [ZnALU]>; -def : WriteRes<WriteLoad, [ZnAGU]> { let Latency = 8; } +def : WriteRes<WriteStore, [ZnAGU]>; +def : WriteRes<WriteStoreNT, [ZnAGU]>; +def : WriteRes<WriteMove, [ZnALU]>; +def : WriteRes<WriteLoad, [ZnAGU]> { let Latency = 8; } def : WriteRes<WriteZero, []>; def : WriteRes<WriteLEA, [ZnALU]>; @@ -197,6 +198,9 @@ defm : X86WriteRes<WriteFMaskedLoadY, [ZnAGU,ZnFPU01], 8, [1,2], 2>; defm : X86WriteRes<WriteFStore, [ZnAGU], 1, [1,1], 1>; defm : X86WriteRes<WriteFStoreX, [ZnAGU], 1, [1,1], 1>; defm : X86WriteRes<WriteFStoreY, [ZnAGU], 1, [1,1], 1>; +defm : X86WriteRes<WriteFStoreNT, [ZnAGU,ZnFPU2], 8, [1,1], 1>; +defm : X86WriteRes<WriteFStoreNTX, [ZnAGU], 1, [1,1], 1>; +defm : X86WriteRes<WriteFStoreNTY, [ZnAGU], 1, [1,1], 1>; defm : X86WriteRes<WriteFMaskedStore, [ZnAGU,ZnFPU01], 4, [1,1], 1>; defm : X86WriteRes<WriteFMaskedStoreY, [ZnAGU,ZnFPU01], 5, [1,2], 2>; defm : X86WriteRes<WriteFMove, [ZnFPU], 1, [1], 1>; @@ -274,11 +278,15 @@ def : WriteRes<WriteCvtF2FSt, [ZnFPU3, ZnAGU]>; defm : X86WriteRes<WriteVecLoad, [ZnAGU], 8, [1], 1>; defm : X86WriteRes<WriteVecLoadX, [ZnAGU], 8, [1], 1>; defm : X86WriteRes<WriteVecLoadY, [ZnAGU], 8, [1], 1>; +defm : X86WriteRes<WriteVecLoadNT, [ZnAGU], 8, [1], 1>; +defm : X86WriteRes<WriteVecLoadNTY, [ZnAGU], 8, [1], 1>; defm : X86WriteRes<WriteVecMaskedLoad, [ZnAGU,ZnFPU01], 8, [1,2], 2>; defm : X86WriteRes<WriteVecMaskedLoadY, [ZnAGU,ZnFPU01], 9, [1,3], 2>; defm : X86WriteRes<WriteVecStore, [ZnAGU], 1, [1,1], 1>; defm : X86WriteRes<WriteVecStoreX, [ZnAGU], 1, [1,1], 1>; defm : X86WriteRes<WriteVecStoreY, [ZnAGU], 1, [1,1], 1>; +defm : X86WriteRes<WriteVecStoreNT, [ZnAGU], 1, [1,1], 1>; +defm : X86WriteRes<WriteVecStoreNTY, [ZnAGU], 1, [1,1], 1>; defm : X86WriteRes<WriteVecMaskedStore, [ZnAGU,ZnFPU01], 4, [1,1], 1>; defm : X86WriteRes<WriteVecMaskedStoreY, [ZnAGU,ZnFPU01], 5, [1,2], 2>; defm : X86WriteRes<WriteVecMove, [ZnFPU], 1, [1], 1>; @@ -1344,12 +1352,6 @@ def ZnWriteINSERTQ: SchedWriteRes<[ZnFPU03,ZnFPU1]> { } def : InstRW<[ZnWriteINSERTQ], (instregex "INSERTQ")>; -// MOVNTSS/MOVNTSD -def ZnWriteMOVNT: SchedWriteRes<[ZnAGU,ZnFPU2]> { - let Latency = 8; -} -def : InstRW<[ZnWriteMOVNT], (instregex "MOVNTS(S|D)")>; - //-- SHA instructions --// // SHA256MSG2 def : InstRW<[WriteMicrocoded], (instregex "SHA256MSG2(Y?)r(r|m)")>; |