summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2018-05-14 18:37:19 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2018-05-14 18:37:19 +0000
commit215ce4a1cadd10d2533efe601c0e1e665df6bba4 (patch)
tree1f1c9e731b88b3075cceb9899fc005dbcb963221
parent62f7af712c0043e8bdb83e6c91bde43a11aab210 (diff)
downloadbcm5719-llvm-215ce4a1cadd10d2533efe601c0e1e665df6bba4.tar.gz
bcm5719-llvm-215ce4a1cadd10d2533efe601c0e1e665df6bba4.zip
[X86] Add NT load/store scheduler classes
llvm-svn: 332274
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td6
-rw-r--r--llvm/lib/Target/X86/X86InstrMMX.td2
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td22
-rwxr-xr-xllvm/lib/Target/X86/X86SchedBroadwell.td16
-rw-r--r--llvm/lib/Target/X86/X86SchedHaswell.td18
-rw-r--r--llvm/lib/Target/X86/X86SchedSandyBridge.td16
-rw-r--r--llvm/lib/Target/X86/X86SchedSkylakeClient.td16
-rwxr-xr-xllvm/lib/Target/X86/X86SchedSkylakeServer.td16
-rw-r--r--llvm/lib/Target/X86/X86Schedule.td34
-rw-r--r--llvm/lib/Target/X86/X86ScheduleAtom.td14
-rw-r--r--llvm/lib/Target/X86/X86ScheduleBtVer2.td36
-rw-r--r--llvm/lib/Target/X86/X86ScheduleSLM.td16
-rw-r--r--llvm/lib/Target/X86/X86ScheduleZnver1.td20
13 files changed, 148 insertions, 84 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index cb995af87f1..d8991fa0e9f 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -4373,11 +4373,11 @@ multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
}
defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
- SchedWriteVecMoveLS>, PD;
+ SchedWriteVecMoveLSNT>, PD;
defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
- SchedWriteFMoveLS>, PD, VEX_W;
+ SchedWriteFMoveLSNT>, PD, VEX_W;
defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
- SchedWriteFMoveLS>, PS;
+ SchedWriteFMoveLSNT>, PS;
let Predicates = [HasAVX512], AddedComplexity = 400 in {
def : Pat<(alignednontemporalstore (v16i32 VR512:$src), addr:$dst),
diff --git a/llvm/lib/Target/X86/X86InstrMMX.td b/llvm/lib/Target/X86/X86InstrMMX.td
index 57a5bee2b26..c716bbc1141 100644
--- a/llvm/lib/Target/X86/X86InstrMMX.td
+++ b/llvm/lib/Target/X86/X86InstrMMX.td
@@ -272,7 +272,7 @@ let Predicates = [HasSSE1] in
def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
"movntq\t{$src, $dst|$dst, $src}",
[(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)]>,
- Sched<[SchedWriteVecMoveLS.MMX.MR]>;
+ Sched<[SchedWriteVecMoveLSNT.MMX.MR]>;
let Predicates = [HasMMX] in {
let AddedComplexity = 15 in
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 0731bdb9d97..cc615e4438d 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -3017,7 +3017,7 @@ defm : scalar_unary_math_patterns<int_x86_sse2_sqrt_sd, "SQRTSD", X86Movsd,
let AddedComplexity = 400 in { // Prefer non-temporal versions
let Predicates = [HasAVX, NoVLX] in {
-let SchedRW = [SchedWriteFMoveLS.XMM.MR] in {
+let SchedRW = [SchedWriteFMoveLSNT.XMM.MR] in {
def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
(ins f128mem:$dst, VR128:$src),
"movntps\t{$src, $dst|$dst, $src}",
@@ -3030,7 +3030,7 @@ def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
addr:$dst)]>, VEX, VEX_WIG;
} // SchedRW
-let SchedRW = [SchedWriteFMoveLS.YMM.MR] in {
+let SchedRW = [SchedWriteFMoveLSNT.YMM.MR] in {
def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
(ins f256mem:$dst, VR256:$src),
"movntps\t{$src, $dst|$dst, $src}",
@@ -3049,17 +3049,17 @@ def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
"movntdq\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v2i64 VR128:$src),
addr:$dst)]>, VEX, VEX_WIG,
- Sched<[SchedWriteVecMoveLS.XMM.MR]>;
+ Sched<[SchedWriteVecMoveLSNT.XMM.MR]>;
def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
(ins i256mem:$dst, VR256:$src),
"movntdq\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v4i64 VR256:$src),
addr:$dst)]>, VEX, VEX_L, VEX_WIG,
- Sched<[SchedWriteVecMoveLS.YMM.MR]>;
+ Sched<[SchedWriteVecMoveLSNT.YMM.MR]>;
} // ExeDomain
} // Predicates
-let SchedRW = [SchedWriteFMoveLS.XMM.MR] in {
+let SchedRW = [SchedWriteFMoveLSNT.XMM.MR] in {
def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movntps\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
@@ -3068,12 +3068,12 @@ def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
[(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>;
} // SchedRW
-let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLS.XMM.MR] in
+let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLSNT.XMM.MR] in
def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movntdq\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)]>;
-let SchedRW = [WriteStore] in {
+let SchedRW = [WriteStoreNT] in {
// There is no AVX form for instructions below this point
def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"movnti{l}\t{$src, $dst|$dst, $src}",
@@ -6409,14 +6409,14 @@ let AddedComplexity = 400 in { // Prefer non-temporal versions
let Predicates = [HasAVX, NoVLX] in
def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vmovntdqa\t{$src, $dst|$dst, $src}", []>,
- Sched<[SchedWriteVecMoveLS.XMM.RM]>, VEX, VEX_WIG;
+ Sched<[SchedWriteVecMoveLSNT.XMM.RM]>, VEX, VEX_WIG;
let Predicates = [HasAVX2, NoVLX] in
def VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
"vmovntdqa\t{$src, $dst|$dst, $src}", []>,
- Sched<[SchedWriteVecMoveLS.YMM.RM]>, VEX, VEX_L, VEX_WIG;
+ Sched<[SchedWriteVecMoveLSNT.YMM.RM]>, VEX, VEX_L, VEX_WIG;
def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movntdqa\t{$src, $dst|$dst, $src}", []>,
- Sched<[SchedWriteVecMoveLS.XMM.RM]>;
+ Sched<[SchedWriteVecMoveLSNT.XMM.RM]>;
let Predicates = [HasAVX2, NoVLX] in {
def : Pat<(v8f32 (alignednontemporalload addr:$src)),
@@ -6935,7 +6935,7 @@ def INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst),
// Non-temporal (unaligned) scalar stores.
let AddedComplexity = 400 in { // Prefer non-temporal versions
-let hasSideEffects = 0, mayStore = 1, SchedRW = [WriteStore] in {
+let hasSideEffects = 0, mayStore = 1, SchedRW = [SchedWriteFMoveLSNT.Scl.MR] in {
def MOVNTSS : I<0x2B, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src),
"movntss\t{$src, $dst|$dst, $src}", []>, XS;
diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td
index 7efc3a28481..3af49f62428 100755
--- a/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -147,9 +147,10 @@ defm : BWWriteResPair<WriteBEXTR, [BWPort06,BWPort15], 2, [1,1], 2>;
defm : BWWriteResPair<WriteBZHI, [BWPort15], 1>;
// Loads, stores, and moves, not folded with other operations.
-def : WriteRes<WriteLoad, [BWPort23]> { let Latency = 5; }
-def : WriteRes<WriteStore, [BWPort237, BWPort4]>;
-def : WriteRes<WriteMove, [BWPort0156]>;
+defm : X86WriteRes<WriteLoad, [BWPort23], 5, [1], 1>;
+defm : X86WriteRes<WriteStore, [BWPort237, BWPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteStoreNT, [BWPort237, BWPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteMove, [BWPort0156], 1, [1,1], 1>;
// Idioms that clear a register, like xorps %xmm0, %xmm0.
// These can often bypass execution ports completely.
@@ -171,6 +172,9 @@ defm : X86WriteRes<WriteFMaskedLoadY, [BWPort23,BWPort5], 8, [1,2], 3>;
defm : X86WriteRes<WriteFStore, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreX, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreY, [BWPort237,BWPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNT, [BWPort237,BWPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNTX, [BWPort237,BWPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNTY, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedStore, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteFMaskedStoreY, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteFMove, [BWPort5], 1, [1], 1>;
@@ -266,11 +270,15 @@ def : WriteRes<WriteCvtF2FSt, [BWPort1,BWPort4,BWPort237]> {
defm : X86WriteRes<WriteVecLoad, [BWPort23], 5, [1], 1>;
defm : X86WriteRes<WriteVecLoadX, [BWPort23], 5, [1], 1>;
defm : X86WriteRes<WriteVecLoadY, [BWPort23], 6, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNT, [BWPort23], 5, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNTY, [BWPort23], 6, [1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [BWPort23,BWPort5], 7, [1,2], 3>;
defm : X86WriteRes<WriteVecMaskedLoadY, [BWPort23,BWPort5], 8, [1,2], 3>;
defm : X86WriteRes<WriteVecStore, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreX, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreY, [BWPort237,BWPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecStoreNT, [BWPort237,BWPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecStoreNTY, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStore, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMaskedStoreY, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMove, [BWPort015], 1, [1], 1>;
@@ -578,8 +586,6 @@ def BWWriteResGroup10 : SchedWriteRes<[BWPort4,BWPort237]> {
}
def: InstRW<[BWWriteResGroup10], (instregex "FBSTPm",
"MMX_MOVD64mr",
- "MOVNTI_64mr",
- "MOVNTImr",
"ST_FP(32|64|80)m",
"(V?)MOV(H|L)(PD|PS)mr",
"(V?)MOVPDI2DImr",
diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td
index 0a64f2a37fe..7eba2469eb9 100644
--- a/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -107,10 +107,11 @@ def : WriteRes<WriteRMW, [HWPort237,HWPort4]>;
// Store_addr on 237.
// Store_data on 4.
-def : WriteRes<WriteStore, [HWPort237, HWPort4]>;
-def : WriteRes<WriteLoad, [HWPort23]> { let Latency = 5; }
-def : WriteRes<WriteMove, [HWPort0156]>;
-def : WriteRes<WriteZero, []>;
+defm : X86WriteRes<WriteStore, [HWPort237, HWPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteStoreNT, [HWPort237, HWPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteLoad, [HWPort23], 5, [1], 1>;
+defm : X86WriteRes<WriteMove, [HWPort0156], 1, [1], 1>;
+def : WriteRes<WriteZero, []>;
defm : HWWriteResPair<WriteALU, [HWPort0156], 1>;
defm : HWWriteResPair<WriteIMul, [HWPort1], 3>;
@@ -161,6 +162,9 @@ defm : X86WriteRes<WriteFMaskedLoadY, [HWPort23,HWPort5], 9, [1,2], 3>;
defm : X86WriteRes<WriteFStore, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreX, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreY, [HWPort237,HWPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNT, [HWPort237,HWPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNTX, [HWPort237,HWPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNTY, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedStore, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteFMaskedStoreY, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteFMove, [HWPort5], 1, [1], 1>;
@@ -257,11 +261,15 @@ def : WriteRes<WriteCvtF2FSt, [HWPort1,HWPort4,HWPort5,HWPort237]> {
defm : X86WriteRes<WriteVecLoad, [HWPort23], 5, [1], 1>;
defm : X86WriteRes<WriteVecLoadX, [HWPort23], 6, [1], 1>;
defm : X86WriteRes<WriteVecLoadY, [HWPort23], 7, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNT, [HWPort23], 6, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNTY, [HWPort23], 7, [1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [HWPort23,HWPort5], 8, [1,2], 3>;
defm : X86WriteRes<WriteVecMaskedLoadY, [HWPort23,HWPort5], 9, [1,2], 3>;
defm : X86WriteRes<WriteVecStore, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreX, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreY, [HWPort237,HWPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecStoreNT, [HWPort237,HWPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecStoreNTY, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStore, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMaskedStoreY, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
defm : X86WriteRes<WriteVecMove, [HWPort015], 1, [1], 1>;
@@ -754,8 +762,6 @@ def HWWriteResGroup1 : SchedWriteRes<[HWPort4,HWPort237]> {
}
def: InstRW<[HWWriteResGroup1], (instregex "FBSTPm",
"MMX_MOVD64mr",
- "MOVNTI_64mr",
- "MOVNTImr",
"ST_FP(32|64|80)m",
"(V?)MOV(H|L)(PD|PS)mr",
"(V?)MOVPDI2DImr",
diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td
index be380ffa2ff..953d0c266fe 100644
--- a/llvm/lib/Target/X86/X86SchedSandyBridge.td
+++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td
@@ -96,10 +96,11 @@ multiclass SBWriteResPair<X86FoldableSchedWrite SchedRW,
// 2/3 cycle to recompute the address.
def : WriteRes<WriteRMW, [SBPort23,SBPort4]>;
-def : WriteRes<WriteStore, [SBPort23, SBPort4]>;
-def : WriteRes<WriteLoad, [SBPort23]> { let Latency = 5; }
-def : WriteRes<WriteMove, [SBPort015]>;
-def : WriteRes<WriteZero, []>;
+def : WriteRes<WriteStore, [SBPort23, SBPort4]>;
+def : WriteRes<WriteStoreNT, [SBPort23, SBPort4]>;
+def : WriteRes<WriteLoad, [SBPort23]> { let Latency = 5; }
+def : WriteRes<WriteMove, [SBPort015]>;
+def : WriteRes<WriteZero, []>;
defm : SBWriteResPair<WriteALU, [SBPort015], 1>;
defm : SBWriteResPair<WriteIMul, [SBPort1], 3>;
@@ -153,6 +154,9 @@ defm : X86WriteRes<WriteFMaskedLoadY, [SBPort23,SBPort05], 9, [1,2], 3>;
defm : X86WriteRes<WriteFStore, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteFStoreX, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteFStoreY, [SBPort23,SBPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteFStoreNT, [SBPort23,SBPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteFStoreNTX, [SBPort23,SBPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteFStoreNTY, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteFMaskedStore, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
defm : X86WriteRes<WriteFMaskedStoreY, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
defm : X86WriteRes<WriteFMove, [SBPort5], 1, [1], 1>;
@@ -237,11 +241,15 @@ def : WriteRes<WriteCvtF2FSt, [SBPort1, SBPort23, SBPort4]> { let Latency = 4;
defm : X86WriteRes<WriteVecLoad, [SBPort23], 5, [1], 1>;
defm : X86WriteRes<WriteVecLoadX, [SBPort23], 6, [1], 1>;
defm : X86WriteRes<WriteVecLoadY, [SBPort23], 7, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNT, [SBPort23], 6, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNTY, [SBPort23], 7, [1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [SBPort23,SBPort05], 8, [1,2], 3>;
defm : X86WriteRes<WriteVecMaskedLoadY, [SBPort23,SBPort05], 9, [1,2], 3>;
defm : X86WriteRes<WriteVecStore, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecStoreX, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecStoreY, [SBPort23,SBPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteVecStoreNT, [SBPort23,SBPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteVecStoreNTY, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecMaskedStore, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
defm : X86WriteRes<WriteVecMaskedStoreY, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
defm : X86WriteRes<WriteVecMove, [SBPort05], 1, [1], 1>;
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
index e5019938dbd..a986d883c0a 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -145,9 +145,10 @@ defm : SKLWriteResPair<WriteBEXTR, [SKLPort06,SKLPort15], 2, [1,1], 2>;
defm : SKLWriteResPair<WriteBZHI, [SKLPort15], 1>;
// Loads, stores, and moves, not folded with other operations.
-def : WriteRes<WriteLoad, [SKLPort23]> { let Latency = 5; }
-def : WriteRes<WriteStore, [SKLPort237, SKLPort4]>;
-def : WriteRes<WriteMove, [SKLPort0156]>;
+defm : X86WriteRes<WriteLoad, [SKLPort23], 5, [1], 1>;
+defm : X86WriteRes<WriteStore, [SKLPort237, SKLPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteStoreNT, [SKLPort237, SKLPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteMove, [SKLPort0156], 1, [1], 1>;
// Idioms that clear a register, like xorps %xmm0, %xmm0.
// These can often bypass execution ports completely.
@@ -166,6 +167,9 @@ defm : X86WriteRes<WriteFMaskedLoadY, [SKLPort23,SKLPort015], 8, [1,1], 2>;
defm : X86WriteRes<WriteFStore, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreX, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreY, [SKLPort237,SKLPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNT, [SKLPort237,SKLPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNTX, [SKLPort237,SKLPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNTY, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedStore, [SKLPort237,SKLPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedStoreY, [SKLPort237,SKLPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteFMove, [SKLPort015], 1, [1], 1>;
@@ -258,11 +262,15 @@ def : WriteRes<WriteCvtF2FSt, [SKLPort4,SKLPort5,SKLPort237,SKLPort01]> {
defm : X86WriteRes<WriteVecLoad, [SKLPort23], 5, [1], 1>;
defm : X86WriteRes<WriteVecLoadX, [SKLPort23], 6, [1], 1>;
defm : X86WriteRes<WriteVecLoadY, [SKLPort23], 7, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNT, [SKLPort23], 6, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNTY, [SKLPort23], 7, [1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [SKLPort23,SKLPort015], 7, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedLoadY, [SKLPort23,SKLPort015], 8, [1,1], 2>;
defm : X86WriteRes<WriteVecStore, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreX, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreY, [SKLPort237,SKLPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecStoreNT, [SKLPort237,SKLPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecStoreNTY, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStore, [SKLPort237,SKLPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStoreY, [SKLPort237,SKLPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMove, [SKLPort015], 1, [1], 1>;
@@ -580,8 +588,6 @@ def SKLWriteResGroup11 : SchedWriteRes<[SKLPort4,SKLPort237]> {
}
def: InstRW<[SKLWriteResGroup11], (instregex "FBSTPm",
"MMX_MOVD64mr",
- "MOVNTI_64mr",
- "MOVNTImr",
"ST_FP(32|64|80)m",
"(V?)MOV(H|L)(PD|PS)mr",
"(V?)MOVPDI2DImr",
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index 4bd95ea494d..01bbb035d46 100755
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -145,9 +145,10 @@ defm : SKXWriteResPair<WriteBEXTR, [SKXPort06,SKXPort15], 2, [1,1], 2>;
defm : SKXWriteResPair<WriteBZHI, [SKXPort15], 1>;
// Loads, stores, and moves, not folded with other operations.
-def : WriteRes<WriteLoad, [SKXPort23]> { let Latency = 5; }
-def : WriteRes<WriteStore, [SKXPort237, SKXPort4]>;
-def : WriteRes<WriteMove, [SKXPort0156]>;
+defm : X86WriteRes<WriteLoad, [SKXPort23], 5, [1], 1>;
+defm : X86WriteRes<WriteStore, [SKXPort237, SKXPort4], 1, [1,1], 1>;
+defm : X86WriteRes<WriteStoreNT, [SKXPort237, SKXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteMove, [SKXPort0156], 1, [1], 1>;
// Idioms that clear a register, like xorps %xmm0, %xmm0.
// These can often bypass execution ports completely.
@@ -166,6 +167,9 @@ defm : X86WriteRes<WriteFMaskedLoadY, [SKXPort23,SKXPort015], 8, [1,1], 2>;
defm : X86WriteRes<WriteFStore, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreX, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNT, [SKXPort237,SKXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNTX, [SKXPort237,SKXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNTY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedStore, [SKXPort237,SKXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedStoreY, [SKXPort237,SKXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteFMove, [SKXPort015], 1, [1], 1>;
@@ -258,11 +262,15 @@ def : WriteRes<WriteCvtF2FSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort015]> {
defm : X86WriteRes<WriteVecLoad, [SKXPort23], 5, [1], 1>;
defm : X86WriteRes<WriteVecLoadX, [SKXPort23], 6, [1], 1>;
defm : X86WriteRes<WriteVecLoadY, [SKXPort23], 7, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNT, [SKXPort23], 6, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNTY, [SKXPort23], 7, [1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [SKXPort23,SKXPort015], 7, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedLoadY, [SKXPort23,SKXPort015], 8, [1,1], 2>;
defm : X86WriteRes<WriteVecStore, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreX, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecStoreNT, [SKXPort237,SKXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecStoreNTY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStore, [SKXPort237,SKXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedStoreY, [SKXPort237,SKXPort0], 2, [1,1], 2>;
defm : X86WriteRes<WriteVecMove, [SKXPort015], 1, [1], 1>;
@@ -605,8 +613,6 @@ def SKXWriteResGroup11 : SchedWriteRes<[SKXPort4,SKXPort237]> {
def: InstRW<[SKXWriteResGroup11], (instregex "FBSTPm",
"KMOV(B|D|Q|W)mk",
"MMX_MOVD64mr",
- "MOVNTI_64mr",
- "MOVNTImr",
"ST_FP(32|64|80)m",
"VMOV(H|L)(PD|PS)Z128mr(b?)",
"(V?)MOV(H|L)(PD|PS)mr",
diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td
index a0a9a911788..6bbf456e588 100644
--- a/llvm/lib/Target/X86/X86Schedule.td
+++ b/llvm/lib/Target/X86/X86Schedule.td
@@ -90,9 +90,10 @@ class X86SchedWriteMoveLSWidths<X86SchedWriteMoveLS sScl,
}
// Loads, stores, and moves, not folded with other operations.
-def WriteLoad : SchedWrite;
-def WriteStore : SchedWrite;
-def WriteMove : SchedWrite;
+def WriteLoad : SchedWrite;
+def WriteStore : SchedWrite;
+def WriteStoreNT : SchedWrite;
+def WriteMove : SchedWrite;
// Arithmetic.
defm WriteALU : X86SchedWritePair; // Simple integer ALU op.
@@ -145,6 +146,9 @@ def WriteFMaskedLoadY : SchedWrite;
def WriteFStore : SchedWrite;
def WriteFStoreX : SchedWrite;
def WriteFStoreY : SchedWrite;
+def WriteFStoreNT : SchedWrite;
+def WriteFStoreNTX : SchedWrite;
+def WriteFStoreNTY : SchedWrite;
def WriteFMaskedStore : SchedWrite;
def WriteFMaskedStoreY : SchedWrite;
def WriteFMove : SchedWrite;
@@ -229,11 +233,15 @@ defm WritePHAddY : X86SchedWritePair; // YMM/ZMM.
def WriteVecLoad : SchedWrite;
def WriteVecLoadX : SchedWrite;
def WriteVecLoadY : SchedWrite;
+def WriteVecLoadNT : SchedWrite;
+def WriteVecLoadNTY : SchedWrite;
def WriteVecMaskedLoad : SchedWrite;
def WriteVecMaskedLoadY : SchedWrite;
def WriteVecStore : SchedWrite;
def WriteVecStoreX : SchedWrite;
def WriteVecStoreY : SchedWrite;
+def WriteVecStoreNT : SchedWrite;
+def WriteVecStoreNTY : SchedWrite;
def WriteVecMaskedStore : SchedWrite;
def WriteVecMaskedStoreY : SchedWrite;
def WriteVecMove : SchedWrite;
@@ -352,6 +360,16 @@ def SchedWriteFMoveLS
: X86SchedWriteMoveLSWidths<WriteFMoveLS, WriteFMoveLSX,
WriteFMoveLSY, WriteFMoveLSY>;
+def WriteFMoveLSNT
+ : X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStoreNT>;
+def WriteFMoveLSNTX
+ : X86SchedWriteMoveLS<WriteFMove, WriteFLoad, WriteFStoreNTX>;
+def WriteFMoveLSNTY
+ : X86SchedWriteMoveLS<WriteFMoveY, WriteFLoadY, WriteFStoreNTY>;
+def SchedWriteFMoveLSNT
+ : X86SchedWriteMoveLSWidths<WriteFMoveLSNT, WriteFMoveLSNTX,
+ WriteFMoveLSNTY, WriteFMoveLSNTY>;
+
def WriteVecMoveLS
: X86SchedWriteMoveLS<WriteVecMove, WriteVecLoad, WriteVecStore>;
def WriteVecMoveLSX
@@ -362,6 +380,16 @@ def SchedWriteVecMoveLS
: X86SchedWriteMoveLSWidths<WriteVecMoveLS, WriteVecMoveLSX,
WriteVecMoveLSY, WriteVecMoveLSY>;
+def WriteVecMoveLSNT
+ : X86SchedWriteMoveLS<WriteVecMove, WriteVecLoadNT, WriteVecStoreNT>;
+def WriteVecMoveLSNTX
+ : X86SchedWriteMoveLS<WriteVecMoveX, WriteVecLoadNT, WriteVecStoreNT>;
+def WriteVecMoveLSNTY
+ : X86SchedWriteMoveLS<WriteVecMoveY, WriteVecLoadNTY, WriteVecStoreNTY>;
+def SchedWriteVecMoveLSNT
+ : X86SchedWriteMoveLSWidths<WriteVecMoveLSNT, WriteVecMoveLSNTX,
+ WriteVecMoveLSNTY, WriteVecMoveLSNTY>;
+
// Vector width wrappers.
def SchedWriteFAdd
: X86SchedWriteWidths<WriteFAdd, WriteFAddX, WriteFAddY, WriteFAddY>;
diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td
index b69a6281fd4..32e84a169ea 100644
--- a/llvm/lib/Target/X86/X86ScheduleAtom.td
+++ b/llvm/lib/Target/X86/X86ScheduleAtom.td
@@ -144,9 +144,10 @@ defm : AtomWriteResPair<WriteShift, [AtomPort0], [AtomPort0]>;
// Loads, stores, and moves, not folded with other operations.
////////////////////////////////////////////////////////////////////////////////
-def : WriteRes<WriteLoad, [AtomPort0]>;
-def : WriteRes<WriteStore, [AtomPort0]>;
-def : WriteRes<WriteMove, [AtomPort01]>;
+def : WriteRes<WriteLoad, [AtomPort0]>;
+def : WriteRes<WriteStore, [AtomPort0]>;
+def : WriteRes<WriteStoreNT, [AtomPort0]>;
+def : WriteRes<WriteMove, [AtomPort01]>;
// Treat misc copies as a move.
def : InstRW<[WriteMove], (instrs COPY)>;
@@ -190,6 +191,9 @@ def : WriteRes<WriteFMaskedLoadY, [AtomPort0]>;
def : WriteRes<WriteFStore, [AtomPort0]>;
def : WriteRes<WriteFStoreX, [AtomPort0]>;
def : WriteRes<WriteFStoreY, [AtomPort0]>;
+def : WriteRes<WriteFStoreNT, [AtomPort0]>;
+def : WriteRes<WriteFStoreNTX, [AtomPort0]>;
+def : WriteRes<WriteFStoreNTY, [AtomPort0]>;
def : WriteRes<WriteFMaskedStore, [AtomPort0]>;
def : WriteRes<WriteFMaskedStoreY, [AtomPort0]>;
@@ -281,12 +285,16 @@ def : WriteRes<WriteCvtF2FSt, [AtomPort0]>; // NOTE: Doesn't exist on Atom.
def : WriteRes<WriteVecLoad, [AtomPort0]>;
def : WriteRes<WriteVecLoadX, [AtomPort0]>;
def : WriteRes<WriteVecLoadY, [AtomPort0]>;
+def : WriteRes<WriteVecLoadNT, [AtomPort0]>;
+def : WriteRes<WriteVecLoadNTY, [AtomPort0]>;
def : WriteRes<WriteVecMaskedLoad, [AtomPort0]>;
def : WriteRes<WriteVecMaskedLoadY, [AtomPort0]>;
def : WriteRes<WriteVecStore, [AtomPort0]>;
def : WriteRes<WriteVecStoreX, [AtomPort0]>;
def : WriteRes<WriteVecStoreY, [AtomPort0]>;
+def : WriteRes<WriteVecStoreNT, [AtomPort0]>;
+def : WriteRes<WriteVecStoreNTY, [AtomPort0]>;
def : WriteRes<WriteVecMaskedStore, [AtomPort0]>;
def : WriteRes<WriteVecMaskedStoreY, [AtomPort0]>;
diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
index 96597456f42..aa0803ff2f0 100644
--- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
@@ -226,9 +226,10 @@ def: InstRW<[JWriteSHLDm],(instrs SHLD16mri8, SHLD32mri8, SHLD64mri8,
// Loads, stores, and moves, not folded with other operations.
////////////////////////////////////////////////////////////////////////////////
-def : WriteRes<WriteLoad, [JLAGU]> { let Latency = 5; }
-def : WriteRes<WriteStore, [JSAGU]>;
-def : WriteRes<WriteMove, [JALU01]>;
+def : WriteRes<WriteLoad, [JLAGU]> { let Latency = 5; }
+def : WriteRes<WriteStore, [JSAGU]>;
+def : WriteRes<WriteStoreNT, [JSAGU]>;
+def : WriteRes<WriteMove, [JALU01]>;
// Load/store MXCSR.
// FIXME: These are copy and pasted from WriteLoad/Store.
@@ -277,6 +278,9 @@ defm : X86WriteRes<WriteFMaskedLoadY, [JLAGU, JFPU01, JFPX], 6, [2, 2, 4], 2>;
defm : X86WriteRes<WriteFStore, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreX, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreY, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteFStoreNT, [JSAGU, JFPU1, JSTC], 3, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteFStoreNTX, [JSAGU, JFPU1, JSTC], 3, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteFStoreNTY, [JSAGU, JFPU1, JSTC], 3, [2, 2, 2], 1>;
defm : X86WriteRes<WriteFMaskedStore, [JSAGU, JFPU01, JFPX], 6, [1, 1, 4], 1>;
defm : X86WriteRes<WriteFMaskedStoreY, [JSAGU, JFPU01, JFPX], 6, [2, 2, 4], 2>;
@@ -407,12 +411,16 @@ def : InstRW<[JWriteCVTSI2FLd], (instregex "(V)?CVTSI(64)?2S(D|S)rm")>;
defm : X86WriteRes<WriteVecLoad, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecLoadX, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecLoadY, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecLoadNT, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecLoadNTY, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [JLAGU, JFPU01, JVALU], 6, [1, 1, 2], 1>;
defm : X86WriteRes<WriteVecMaskedLoadY, [JLAGU, JFPU01, JVALU], 6, [2, 2, 4], 2>;
defm : X86WriteRes<WriteVecStore, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecStoreX, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteVecStoreY, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecStoreNT, [JSAGU, JFPU1, JSTC], 2, [1, 1, 1], 1>;
+defm : X86WriteRes<WriteVecStoreNTY, [JSAGU, JFPU1, JSTC], 2, [2, 2, 2], 1>;
defm : X86WriteRes<WriteVecMaskedStore, [JSAGU, JFPU01, JVALU], 6, [1, 1, 4], 1>;
defm : X86WriteRes<WriteVecMaskedStoreY, [JSAGU, JFPU01, JVALU], 6, [2, 2, 4], 2>;
@@ -572,28 +580,6 @@ def JWriteVCVTYLd: SchedWriteRes<[JLAGU, JFPU1, JSTC]> {
def : InstRW<[JWriteVCVTYLd, ReadAfterLd], (instrs VCVTDQ2PDYrm, VCVTDQ2PSYrm,
VCVTPS2DQYrm, VCVTTPS2DQYrm)>;
-def JWriteVMOVNTDQSt: SchedWriteRes<[JFPU1, JSTC, JSAGU]> {
- let Latency = 2;
-}
-def : InstRW<[JWriteVMOVNTDQSt], (instrs MMX_MOVNTQmr, MOVNTDQmr, VMOVNTDQmr)>;
-
-def JWriteVMOVNTDQYSt: SchedWriteRes<[JFPU1, JSTC, JSAGU]> {
- let Latency = 2;
- let ResourceCycles = [2, 2, 2];
-}
-def : InstRW<[JWriteVMOVNTDQYSt], (instrs VMOVNTDQYmr)>;
-
-def JWriteMOVNTSt: SchedWriteRes<[JFPU1, JSTC, JSAGU]> {
- let Latency = 3;
-}
-def : InstRW<[JWriteMOVNTSt], (instrs MOVNTPDmr, MOVNTPSmr, MOVNTSD, MOVNTSS, VMOVNTPDmr, VMOVNTPSmr)>;
-
-def JWriteVMOVNTPYSt: SchedWriteRes<[JFPU1, JSTC, JSAGU]> {
- let Latency = 3;
- let ResourceCycles = [2, 2, 2];
-}
-def : InstRW<[JWriteVMOVNTPYSt], (instrs VMOVNTPDYmr, VMOVNTPSYmr)>;
-
def JWriteVCVTPDY: SchedWriteRes<[JFPU1, JSTC, JFPX]> {
let Latency = 6;
let ResourceCycles = [2, 2, 4];
diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td
index ab39b241864..cec2768b792 100644
--- a/llvm/lib/Target/X86/X86ScheduleSLM.td
+++ b/llvm/lib/Target/X86/X86ScheduleSLM.td
@@ -79,10 +79,11 @@ multiclass SLMWriteResPair<X86FoldableSchedWrite SchedRW,
// need an extra port cycle to recompute the address.
def : WriteRes<WriteRMW, [SLM_MEC_RSV]>;
-def : WriteRes<WriteStore, [SLM_IEC_RSV01, SLM_MEC_RSV]>;
-def : WriteRes<WriteLoad, [SLM_MEC_RSV]> { let Latency = 3; }
-def : WriteRes<WriteMove, [SLM_IEC_RSV01]>;
-def : WriteRes<WriteZero, []>;
+def : WriteRes<WriteStore, [SLM_IEC_RSV01, SLM_MEC_RSV]>;
+def : WriteRes<WriteStoreNT, [SLM_IEC_RSV01, SLM_MEC_RSV]>;
+def : WriteRes<WriteLoad, [SLM_MEC_RSV]> { let Latency = 3; }
+def : WriteRes<WriteMove, [SLM_IEC_RSV01]>;
+def : WriteRes<WriteZero, []>;
// Load/store MXCSR.
// FIXME: These are probably wrong. They are copy pasted from WriteStore/Load.
@@ -141,6 +142,9 @@ def : WriteRes<WriteFMaskedLoadY, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteFStore, [SLM_MEC_RSV]>;
def : WriteRes<WriteFStoreX, [SLM_MEC_RSV]>;
def : WriteRes<WriteFStoreY, [SLM_MEC_RSV]>;
+def : WriteRes<WriteFStoreNT, [SLM_MEC_RSV]>;
+def : WriteRes<WriteFStoreNTX, [SLM_MEC_RSV]>;
+def : WriteRes<WriteFStoreNTY, [SLM_MEC_RSV]>;
def : WriteRes<WriteFMaskedStore, [SLM_MEC_RSV]>;
def : WriteRes<WriteFMaskedStoreY, [SLM_MEC_RSV]>;
def : WriteRes<WriteFMove, [SLM_FPC_RSV01]>;
@@ -214,11 +218,15 @@ def : WriteRes<WriteCvtF2FSt, [SLM_FPC_RSV01, SLM_MEC_RSV]>;
def : WriteRes<WriteVecLoad, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecLoadX, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecLoadY, [SLM_MEC_RSV]> { let Latency = 3; }
+def : WriteRes<WriteVecLoadNT, [SLM_MEC_RSV]> { let Latency = 3; }
+def : WriteRes<WriteVecLoadNTY, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecMaskedLoad, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecMaskedLoadY, [SLM_MEC_RSV]> { let Latency = 3; }
def : WriteRes<WriteVecStore, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecStoreX, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecStoreY, [SLM_MEC_RSV]>;
+def : WriteRes<WriteVecStoreNT, [SLM_MEC_RSV]>;
+def : WriteRes<WriteVecStoreNTY, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecMaskedStore, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecMaskedStoreY, [SLM_MEC_RSV]>;
def : WriteRes<WriteVecMove, [SLM_FPC_RSV01]>;
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td
index 0e68fabb017..ecf5ea62b44 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver1.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td
@@ -143,9 +143,10 @@ multiclass ZnWriteResFpuPair<X86FoldableSchedWrite SchedRW,
// operation in codegen
def : WriteRes<WriteRMW, [ZnAGU]>;
-def : WriteRes<WriteStore, [ZnAGU]>;
-def : WriteRes<WriteMove, [ZnALU]>;
-def : WriteRes<WriteLoad, [ZnAGU]> { let Latency = 8; }
+def : WriteRes<WriteStore, [ZnAGU]>;
+def : WriteRes<WriteStoreNT, [ZnAGU]>;
+def : WriteRes<WriteMove, [ZnALU]>;
+def : WriteRes<WriteLoad, [ZnAGU]> { let Latency = 8; }
def : WriteRes<WriteZero, []>;
def : WriteRes<WriteLEA, [ZnALU]>;
@@ -197,6 +198,9 @@ defm : X86WriteRes<WriteFMaskedLoadY, [ZnAGU,ZnFPU01], 8, [1,2], 2>;
defm : X86WriteRes<WriteFStore, [ZnAGU], 1, [1,1], 1>;
defm : X86WriteRes<WriteFStoreX, [ZnAGU], 1, [1,1], 1>;
defm : X86WriteRes<WriteFStoreY, [ZnAGU], 1, [1,1], 1>;
+defm : X86WriteRes<WriteFStoreNT, [ZnAGU,ZnFPU2], 8, [1,1], 1>;
+defm : X86WriteRes<WriteFStoreNTX, [ZnAGU], 1, [1,1], 1>;
+defm : X86WriteRes<WriteFStoreNTY, [ZnAGU], 1, [1,1], 1>;
defm : X86WriteRes<WriteFMaskedStore, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
defm : X86WriteRes<WriteFMaskedStoreY, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
defm : X86WriteRes<WriteFMove, [ZnFPU], 1, [1], 1>;
@@ -274,11 +278,15 @@ def : WriteRes<WriteCvtF2FSt, [ZnFPU3, ZnAGU]>;
defm : X86WriteRes<WriteVecLoad, [ZnAGU], 8, [1], 1>;
defm : X86WriteRes<WriteVecLoadX, [ZnAGU], 8, [1], 1>;
defm : X86WriteRes<WriteVecLoadY, [ZnAGU], 8, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNT, [ZnAGU], 8, [1], 1>;
+defm : X86WriteRes<WriteVecLoadNTY, [ZnAGU], 8, [1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [ZnAGU,ZnFPU01], 8, [1,2], 2>;
defm : X86WriteRes<WriteVecMaskedLoadY, [ZnAGU,ZnFPU01], 9, [1,3], 2>;
defm : X86WriteRes<WriteVecStore, [ZnAGU], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecStoreX, [ZnAGU], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecStoreY, [ZnAGU], 1, [1,1], 1>;
+defm : X86WriteRes<WriteVecStoreNT, [ZnAGU], 1, [1,1], 1>;
+defm : X86WriteRes<WriteVecStoreNTY, [ZnAGU], 1, [1,1], 1>;
defm : X86WriteRes<WriteVecMaskedStore, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
defm : X86WriteRes<WriteVecMaskedStoreY, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
defm : X86WriteRes<WriteVecMove, [ZnFPU], 1, [1], 1>;
@@ -1344,12 +1352,6 @@ def ZnWriteINSERTQ: SchedWriteRes<[ZnFPU03,ZnFPU1]> {
}
def : InstRW<[ZnWriteINSERTQ], (instregex "INSERTQ")>;
-// MOVNTSS/MOVNTSD
-def ZnWriteMOVNT: SchedWriteRes<[ZnAGU,ZnFPU2]> {
- let Latency = 8;
-}
-def : InstRW<[ZnWriteMOVNT], (instregex "MOVNTS(S|D)")>;
-
//-- SHA instructions --//
// SHA256MSG2
def : InstRW<[WriteMicrocoded], (instregex "SHA256MSG2(Y?)r(r|m)")>;
OpenPOWER on IntegriCloud