summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td18
-rw-r--r--llvm/lib/Target/X86/X86SchedBroadwell.td8
-rw-r--r--llvm/lib/Target/X86/X86SchedHaswell.td8
-rw-r--r--llvm/lib/Target/X86/X86SchedSandyBridge.td8
-rw-r--r--llvm/lib/Target/X86/X86SchedSkylakeClient.td8
-rw-r--r--llvm/lib/Target/X86/X86SchedSkylakeServer.td8
-rw-r--r--llvm/lib/Target/X86/X86Schedule.td24
-rw-r--r--llvm/lib/Target/X86/X86ScheduleAtom.td6
-rw-r--r--llvm/lib/Target/X86/X86ScheduleBdVer2.td6
-rw-r--r--llvm/lib/Target/X86/X86ScheduleBtVer2.td19
-rw-r--r--llvm/lib/Target/X86/X86ScheduleSLM.td8
-rw-r--r--llvm/lib/Target/X86/X86ScheduleZnver1.td8
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s22
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/resources-sse2.s6
14 files changed, 114 insertions, 43 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 16c76ca7a6e..b3c88982a39 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -7061,27 +7061,29 @@ let Predicates = [HasAVX1Only] in {
//
multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr,
Intrinsic IntLd, Intrinsic IntLd256,
- Intrinsic IntSt, Intrinsic IntSt256> {
+ Intrinsic IntSt, Intrinsic IntSt256,
+ X86SchedWriteMaskMove schedX,
+ X86SchedWriteMaskMove schedY> {
def rm : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, f128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>,
- VEX_4V, Sched<[WriteFMaskedLoad]>;
+ VEX_4V, Sched<[schedX.RM]>;
def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
- VEX_4V, VEX_L, Sched<[WriteFMaskedLoadY]>;
+ VEX_4V, VEX_L, Sched<[schedY.RM]>;
def mr : AVX8I<opc_mr, MRMDestMem, (outs),
(ins f128mem:$dst, VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>,
- VEX_4V, Sched<[WriteFMaskedStore]>;
+ VEX_4V, Sched<[schedX.MR]>;
def Ymr : AVX8I<opc_mr, MRMDestMem, (outs),
(ins f256mem:$dst, VR256:$src1, VR256:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>,
- VEX_4V, VEX_L, Sched<[WriteFMaskedStoreY]>;
+ VEX_4V, VEX_L, Sched<[schedY.MR]>;
}
let ExeDomain = SSEPackedSingle in
@@ -7089,13 +7091,15 @@ defm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps",
int_x86_avx_maskload_ps,
int_x86_avx_maskload_ps_256,
int_x86_avx_maskstore_ps,
- int_x86_avx_maskstore_ps_256>;
+ int_x86_avx_maskstore_ps_256,
+ WriteFMaskMove32, WriteFMaskMove32Y>;
let ExeDomain = SSEPackedDouble in
defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd",
int_x86_avx_maskload_pd,
int_x86_avx_maskload_pd_256,
int_x86_avx_maskstore_pd,
- int_x86_avx_maskstore_pd_256>;
+ int_x86_avx_maskstore_pd_256,
+ WriteFMaskMove64, WriteFMaskMove64Y>;
//===----------------------------------------------------------------------===//
// VPERMIL - Permute Single and Double Floating-Point Values
diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td
index 7574e4b8f89..9b1fcaa8a13 100644
--- a/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -232,8 +232,12 @@ defm : X86WriteRes<WriteFStoreY, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNT, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNTX, [BWPort237,BWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNTY, [BWPort237,BWPort4], 1, [1,1], 2>;
-defm : X86WriteRes<WriteFMaskedStore, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
-defm : X86WriteRes<WriteFMaskedStoreY, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
+
+defm : X86WriteRes<WriteFMaskedStore32, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteFMaskedStore32Y, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteFMaskedStore64, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteFMaskedStore64Y, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>;
+
defm : X86WriteRes<WriteFMove, [BWPort5], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [BWPort5], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveY, [BWPort5], 1, [1], 1>;
diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td
index 284d1567c5c..06f417501b2 100644
--- a/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -231,8 +231,12 @@ defm : X86WriteRes<WriteFStoreY, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNT, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNTX, [HWPort237,HWPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNTY, [HWPort237,HWPort4], 1, [1,1], 2>;
-defm : X86WriteRes<WriteFMaskedStore, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
-defm : X86WriteRes<WriteFMaskedStoreY, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
+
+defm : X86WriteRes<WriteFMaskedStore32, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteFMaskedStore32Y, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteFMaskedStore64, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteFMaskedStore64Y, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>;
+
defm : X86WriteRes<WriteFMove, [HWPort5], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [HWPort5], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveY, [HWPort5], 1, [1], 1>;
diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td
index d40bdf728a4..26d4d8fa354 100644
--- a/llvm/lib/Target/X86/X86SchedSandyBridge.td
+++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td
@@ -208,8 +208,12 @@ defm : X86WriteRes<WriteFStoreY, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteFStoreNT, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteFStoreNTX, [SBPort23,SBPort4], 1, [1,1], 1>;
defm : X86WriteRes<WriteFStoreNTY, [SBPort23,SBPort4], 1, [1,1], 1>;
-defm : X86WriteRes<WriteFMaskedStore, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
-defm : X86WriteRes<WriteFMaskedStoreY, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
+
+defm : X86WriteRes<WriteFMaskedStore32, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
+defm : X86WriteRes<WriteFMaskedStore32Y, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
+defm : X86WriteRes<WriteFMaskedStore64, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
+defm : X86WriteRes<WriteFMaskedStore64Y, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>;
+
defm : X86WriteRes<WriteFMove, [SBPort5], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [SBPort5], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveY, [SBPort5], 1, [1], 1>;
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
index 8f3e4ae62d5..9a511ecc007 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -226,8 +226,12 @@ defm : X86WriteRes<WriteFStoreY, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNT, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNTX, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNTY, [SKLPort237,SKLPort4], 1, [1,1], 2>;
-defm : X86WriteRes<WriteFMaskedStore, [SKLPort237,SKLPort0], 2, [1,1], 2>;
-defm : X86WriteRes<WriteFMaskedStoreY, [SKLPort237,SKLPort0], 2, [1,1], 2>;
+
+defm : X86WriteRes<WriteFMaskedStore32, [SKLPort237,SKLPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteFMaskedStore32Y, [SKLPort237,SKLPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteFMaskedStore64, [SKLPort237,SKLPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteFMaskedStore64Y, [SKLPort237,SKLPort0], 2, [1,1], 2>;
+
defm : X86WriteRes<WriteFMove, [SKLPort015], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [SKLPort015], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveY, [SKLPort015], 1, [1], 1>;
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index 58caf1dacfc..a8c65435ab9 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -226,8 +226,12 @@ defm : X86WriteRes<WriteFStoreY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNT, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNTX, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNTY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
-defm : X86WriteRes<WriteFMaskedStore, [SKXPort237,SKXPort0], 2, [1,1], 2>;
-defm : X86WriteRes<WriteFMaskedStoreY, [SKXPort237,SKXPort0], 2, [1,1], 2>;
+
+defm : X86WriteRes<WriteFMaskedStore32, [SKXPort237,SKXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteFMaskedStore32Y, [SKXPort237,SKXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteFMaskedStore64, [SKXPort237,SKXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteFMaskedStore64Y, [SKXPort237,SKXPort0], 2, [1,1], 2>;
+
defm : X86WriteRes<WriteFMove, [SKXPort015], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [SKXPort015], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveY, [SKXPort015], 1, [1], 1>;
diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td
index 55ca85ec1e3..95f710061ae 100644
--- a/llvm/lib/Target/X86/X86Schedule.td
+++ b/llvm/lib/Target/X86/X86Schedule.td
@@ -102,6 +102,12 @@ class X86SchedWriteMoveLS<SchedWrite MoveRR,
SchedWrite MR = StoreMR;
}
+// Multiclass that wraps masked load/store writes for a vector width.
+class X86SchedWriteMaskMove<SchedWrite LoadRM, SchedWrite StoreMR> {
+ SchedWrite RM = LoadRM;
+ SchedWrite MR = StoreMR;
+}
+
// Multiclass that wraps X86SchedWriteMoveLS for each vector width.
class X86SchedWriteMoveLSWidths<X86SchedWriteMoveLS sScl,
X86SchedWriteMoveLS s128,
@@ -218,8 +224,12 @@ def WriteFStoreY : SchedWrite;
def WriteFStoreNT : SchedWrite;
def WriteFStoreNTX : SchedWrite;
def WriteFStoreNTY : SchedWrite;
-def WriteFMaskedStore : SchedWrite;
-def WriteFMaskedStoreY : SchedWrite;
+
+def WriteFMaskedStore32 : SchedWrite;
+def WriteFMaskedStore64 : SchedWrite;
+def WriteFMaskedStore32Y : SchedWrite;
+def WriteFMaskedStore64Y : SchedWrite;
+
def WriteFMove : SchedWrite;
def WriteFMoveX : SchedWrite;
def WriteFMoveY : SchedWrite;
@@ -530,6 +540,16 @@ def SchedWriteVecMoveLSNT
: X86SchedWriteMoveLSWidths<WriteVecMoveLSNT, WriteVecMoveLSNTX,
WriteVecMoveLSNTY, WriteVecMoveLSNTY>;
+// Conditional SIMD Packed Loads and Stores wrappers.
+def WriteFMaskMove32
+ : X86SchedWriteMaskMove<WriteFMaskedLoad, WriteFMaskedStore32>;
+def WriteFMaskMove64
+ : X86SchedWriteMaskMove<WriteFMaskedLoad, WriteFMaskedStore64>;
+def WriteFMaskMove32Y
+ : X86SchedWriteMaskMove<WriteFMaskedLoadY, WriteFMaskedStore32Y>;
+def WriteFMaskMove64Y
+ : X86SchedWriteMaskMove<WriteFMaskedLoadY, WriteFMaskedStore64Y>;
+
// Vector width wrappers.
def SchedWriteFAdd
: X86SchedWriteWidths<WriteFAdd, WriteFAddX, WriteFAddY, WriteFAddZ>;
diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td
index b0334655de7..78acb1065ec 100644
--- a/llvm/lib/Target/X86/X86ScheduleAtom.td
+++ b/llvm/lib/Target/X86/X86ScheduleAtom.td
@@ -216,8 +216,10 @@ defm : X86WriteResUnsupported<WriteFStoreY>;
def : WriteRes<WriteFStoreNT, [AtomPort0]>;
def : WriteRes<WriteFStoreNTX, [AtomPort0]>;
defm : X86WriteResUnsupported<WriteFStoreNTY>;
-defm : X86WriteResUnsupported<WriteFMaskedStore>;
-defm : X86WriteResUnsupported<WriteFMaskedStoreY>;
+defm : X86WriteResUnsupported<WriteFMaskedStore32>;
+defm : X86WriteResUnsupported<WriteFMaskedStore32Y>;
+defm : X86WriteResUnsupported<WriteFMaskedStore64>;
+defm : X86WriteResUnsupported<WriteFMaskedStore64Y>;
def : WriteRes<WriteFMove, [AtomPort01]>;
def : WriteRes<WriteFMoveX, [AtomPort01]>;
diff --git a/llvm/lib/Target/X86/X86ScheduleBdVer2.td b/llvm/lib/Target/X86/X86ScheduleBdVer2.td
index 8cc01c3acec..d7aea3cf4e9 100644
--- a/llvm/lib/Target/X86/X86ScheduleBdVer2.td
+++ b/llvm/lib/Target/X86/X86ScheduleBdVer2.td
@@ -726,8 +726,10 @@ defm : PdWriteRes<WriteFStoreNT, [PdStore, PdFPU1, PdFPSTO], 3>;
defm : PdWriteRes<WriteFStoreNTX, [PdStore, PdFPU1, PdFPSTO], 3>;
defm : PdWriteRes<WriteFStoreNTY, [PdStore, PdFPU1, PdFPSTO], 3, [2, 2, 2], 4>;
-defm : PdWriteRes<WriteFMaskedStore, [PdStore, PdFPU01, PdFPFMA], 6, [1, 1, 188], 18>;
-defm : PdWriteRes<WriteFMaskedStoreY, [PdStore, PdFPU01, PdFPFMA], 6, [2, 2, 376], 34>;
+defm : PdWriteRes<WriteFMaskedStore32, [PdStore, PdFPU01, PdFPFMA], 6, [1, 1, 188], 18>;
+defm : PdWriteRes<WriteFMaskedStore64, [PdStore, PdFPU01, PdFPFMA], 6, [1, 1, 188], 18>;
+defm : PdWriteRes<WriteFMaskedStore32Y, [PdStore, PdFPU01, PdFPFMA], 6, [2, 2, 376], 34>;
+defm : PdWriteRes<WriteFMaskedStore64Y, [PdStore, PdFPU01, PdFPFMA], 6, [2, 2, 376], 34>;
defm : PdWriteRes<WriteFMove, [PdFPU01, PdFPFMA]>;
defm : PdWriteRes<WriteFMoveX, [PdFPU01, PdFPFMA], 1, [1, 2]>;
diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
index 32549bc06e0..3addf048dba 100644
--- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
@@ -512,8 +512,11 @@ defm : X86WriteRes<WriteFStoreY, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreNT, [JSAGU, JFPU1, JSTC], 3, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreNTX, [JSAGU, JFPU1, JSTC], 3, [1, 1, 1], 1>;
defm : X86WriteRes<WriteFStoreNTY, [JSAGU, JFPU1, JSTC], 3, [2, 2, 2], 1>;
-defm : X86WriteRes<WriteFMaskedStore, [JSAGU, JFPU01, JFPX], 6, [1, 1, 4], 1>;
-defm : X86WriteRes<WriteFMaskedStoreY, [JSAGU, JFPU01, JFPX], 6, [2, 2, 4], 2>;
+
+defm : X86WriteRes<WriteFMaskedStore32, [JFPU0, JFPA, JFPU1, JSTC, JLAGU, JSAGU, JALU01], 16, [1,1, 5, 5,4,4,4], 19>;
+defm : X86WriteRes<WriteFMaskedStore64, [JFPU0, JFPA, JFPU1, JSTC, JLAGU, JSAGU, JALU01], 13, [1,1, 2, 2,2,2,2], 10>;
+defm : X86WriteRes<WriteFMaskedStore32Y, [JFPU0, JFPA, JFPU1, JSTC, JLAGU, JSAGU, JALU01], 22, [1,1,10,10,8,8,8], 36>;
+defm : X86WriteRes<WriteFMaskedStore64Y, [JFPU0, JFPA, JFPU1, JSTC, JLAGU, JSAGU, JALU01], 16, [1,1, 4, 4,4,4,4], 18>;
defm : X86WriteRes<WriteFMove, [JFPU01, JFPX], 1, [1, 1], 1>;
defm : X86WriteRes<WriteFMoveX, [JFPU01, JFPX], 1, [1, 1], 1>;
@@ -819,6 +822,18 @@ def JWriteJVZEROUPPER: SchedWriteRes<[]> {
def : InstRW<[JWriteJVZEROUPPER], (instrs VZEROUPPER)>;
///////////////////////////////////////////////////////////////////////////////
+// SSE2/AVX Store Selected Bytes of Double Quadword - (V)MASKMOVDQ
+///////////////////////////////////////////////////////////////////////////////
+
+def JWriteMASKMOVDQU: SchedWriteRes<[JFPU0, JFPA, JFPU1, JSTC, JLAGU, JSAGU, JALU01]> {
+ let Latency = 34;
+ let ResourceCycles = [1, 1, 2, 2, 2, 16, 42];
+ let NumMicroOps = 63;
+}
+def : InstRW<[JWriteMASKMOVDQU], (instrs MASKMOVDQU, MASKMOVDQU64,
+ VMASKMOVDQU, VMASKMOVDQU64)>;
+
+///////////////////////////////////////////////////////////////////////////////
// SchedWriteVariant definitions.
///////////////////////////////////////////////////////////////////////////////
diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td
index 34c251a5c5b..8e3ce721f1a 100644
--- a/llvm/lib/Target/X86/X86ScheduleSLM.td
+++ b/llvm/lib/Target/X86/X86ScheduleSLM.td
@@ -186,8 +186,12 @@ def : WriteRes<WriteFStoreY, [SLM_MEC_RSV]>;
def : WriteRes<WriteFStoreNT, [SLM_MEC_RSV]>;
def : WriteRes<WriteFStoreNTX, [SLM_MEC_RSV]>;
def : WriteRes<WriteFStoreNTY, [SLM_MEC_RSV]>;
-def : WriteRes<WriteFMaskedStore, [SLM_MEC_RSV]>;
-def : WriteRes<WriteFMaskedStoreY, [SLM_MEC_RSV]>;
+
+def : WriteRes<WriteFMaskedStore32, [SLM_MEC_RSV]>;
+def : WriteRes<WriteFMaskedStore32Y, [SLM_MEC_RSV]>;
+def : WriteRes<WriteFMaskedStore64, [SLM_MEC_RSV]>;
+def : WriteRes<WriteFMaskedStore64Y, [SLM_MEC_RSV]>;
+
def : WriteRes<WriteFMove, [SLM_FPC_RSV01]>;
def : WriteRes<WriteFMoveX, [SLM_FPC_RSV01]>;
def : WriteRes<WriteFMoveY, [SLM_FPC_RSV01]>;
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td
index 65f6d89df61..06201f4a3a8 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver1.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td
@@ -268,8 +268,12 @@ defm : X86WriteRes<WriteFStoreY, [ZnAGU], 1, [1], 1>;
defm : X86WriteRes<WriteFStoreNT, [ZnAGU,ZnFPU2], 8, [1,1], 1>;
defm : X86WriteRes<WriteFStoreNTX, [ZnAGU], 1, [1], 1>;
defm : X86WriteRes<WriteFStoreNTY, [ZnAGU], 1, [1], 1>;
-defm : X86WriteRes<WriteFMaskedStore, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
-defm : X86WriteRes<WriteFMaskedStoreY, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
+
+defm : X86WriteRes<WriteFMaskedStore32, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
+defm : X86WriteRes<WriteFMaskedStore32Y, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
+defm : X86WriteRes<WriteFMaskedStore64, [ZnAGU,ZnFPU01], 4, [1,1], 1>;
+defm : X86WriteRes<WriteFMaskedStore64Y, [ZnAGU,ZnFPU01], 5, [1,2], 2>;
+
defm : X86WriteRes<WriteFMove, [ZnFPU], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [ZnFPU], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveY, [ZnFPU], 1, [1], 1>;
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s
index 5b9c1dd66e9..1378a115b88 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s
@@ -1219,15 +1219,15 @@ vzeroupper
# CHECK-NEXT: 1 5 1.00 * vlddqu (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 * vlddqu (%rax), %ymm2
# CHECK-NEXT: 1 3 1.00 * U vldmxcsr (%rax)
-# CHECK-NEXT: 1 1 1.00 * * U vmaskmovdqu %xmm0, %xmm1
+# CHECK-NEXT: 63 34 21.00 * * U vmaskmovdqu %xmm0, %xmm1
# CHECK-NEXT: 1 6 1.00 * vmaskmovpd (%rax), %xmm0, %xmm2
# CHECK-NEXT: 2 6 2.00 * vmaskmovpd (%rax), %ymm0, %ymm2
-# CHECK-NEXT: 1 6 2.00 * * vmaskmovpd %xmm0, %xmm1, (%rax)
-# CHECK-NEXT: 2 6 2.00 * * vmaskmovpd %ymm0, %ymm1, (%rax)
+# CHECK-NEXT: 10 13 2.00 * * vmaskmovpd %xmm0, %xmm1, (%rax)
+# CHECK-NEXT: 18 16 4.00 * * vmaskmovpd %ymm0, %ymm1, (%rax)
# CHECK-NEXT: 1 6 1.00 * vmaskmovps (%rax), %xmm0, %xmm2
# CHECK-NEXT: 2 6 2.00 * vmaskmovps (%rax), %ymm0, %ymm2
-# CHECK-NEXT: 1 6 2.00 * * vmaskmovps %xmm0, %xmm1, (%rax)
-# CHECK-NEXT: 2 6 2.00 * * vmaskmovps %ymm0, %ymm1, (%rax)
+# CHECK-NEXT: 19 16 5.00 * * vmaskmovps %xmm0, %xmm1, (%rax)
+# CHECK-NEXT: 36 22 10.00 * * vmaskmovps %ymm0, %ymm1, (%rax)
# CHECK-NEXT: 1 2 1.00 vmaxpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vmaxpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 2 2.00 vmaxpd %ymm0, %ymm1, %ymm2
@@ -1740,7 +1740,7 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
-# CHECK-NEXT: 56.00 - - 365.00 915.00 447.50 461.50 394.00 - 51.00 132.00 135.50 159.50 38.00
+# CHECK-NEXT: 86.00 30.00 - 362.00 907.00 449.50 480.50 414.00 - 78.00 154.00 135.50 159.50 38.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
@@ -1933,15 +1933,15 @@ vzeroupper
# CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - vlddqu (%rax), %xmm2
# CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - vlddqu (%rax), %ymm2
# CHECK-NEXT: - - - - - - - 1.00 - - - - - - vldmxcsr (%rax)
-# CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vmaskmovdqu %xmm0, %xmm1
+# CHECK-NEXT: 21.00 21.00 - 1.00 - 1.00 2.00 2.00 - 16.00 2.00 - - - vmaskmovdqu %xmm0, %xmm1
# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 1.00 - - - - - - vmaskmovpd (%rax), %xmm0, %xmm2
# CHECK-NEXT: - - - 2.00 2.00 2.00 2.00 2.00 - - - - - - vmaskmovpd (%rax), %ymm0, %ymm2
-# CHECK-NEXT: - - - 2.00 2.00 0.50 0.50 - - 1.00 - - - - vmaskmovpd %xmm0, %xmm1, (%rax)
-# CHECK-NEXT: - - - 2.00 2.00 1.00 1.00 - - 2.00 - - - - vmaskmovpd %ymm0, %ymm1, (%rax)
+# CHECK-NEXT: 1.00 1.00 - 1.00 - 1.00 2.00 2.00 - 2.00 2.00 - - - vmaskmovpd %xmm0, %xmm1, (%rax)
+# CHECK-NEXT: 2.00 2.00 - 1.00 - 1.00 4.00 4.00 - 4.00 4.00 - - - vmaskmovpd %ymm0, %ymm1, (%rax)
# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 1.00 - - - - - - vmaskmovps (%rax), %xmm0, %xmm2
# CHECK-NEXT: - - - 2.00 2.00 2.00 2.00 2.00 - - - - - - vmaskmovps (%rax), %ymm0, %ymm2
-# CHECK-NEXT: - - - 2.00 2.00 0.50 0.50 - - 1.00 - - - - vmaskmovps %xmm0, %xmm1, (%rax)
-# CHECK-NEXT: - - - 2.00 2.00 1.00 1.00 - - 2.00 - - - - vmaskmovps %ymm0, %ymm1, (%rax)
+# CHECK-NEXT: 2.00 2.00 - 1.00 - 1.00 5.00 4.00 - 4.00 5.00 - - - vmaskmovps %xmm0, %xmm1, (%rax)
+# CHECK-NEXT: 4.00 4.00 - 1.00 - 1.00 10.00 8.00 - 8.00 10.00 - - - vmaskmovps %ymm0, %ymm1, (%rax)
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vmaxpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vmaxpd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vmaxpd %ymm0, %ymm1, %ymm2
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-sse2.s
index c9d7f32d4e0..924066ae87b 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-sse2.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-sse2.s
@@ -465,7 +465,7 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 19 19.00 divsd %xmm0, %xmm2
# CHECK-NEXT: 1 24 19.00 * divsd (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 * * U lfence
-# CHECK-NEXT: 1 1 1.00 * * U maskmovdqu %xmm0, %xmm1
+# CHECK-NEXT: 63 34 21.00 * * U maskmovdqu %xmm0, %xmm1
# CHECK-NEXT: 1 2 1.00 maxpd %xmm0, %xmm2
# CHECK-NEXT: 1 7 1.00 * maxpd (%rax), %xmm2
# CHECK-NEXT: 1 2 1.00 maxsd %xmm0, %xmm2
@@ -693,7 +693,7 @@ xorpd (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
-# CHECK-NEXT: 17.00 - - 49.00 204.00 128.50 141.50 118.00 - 16.00 54.00 67.50 67.50 12.00
+# CHECK-NEXT: 38.00 21.00 - 50.00 204.00 129.50 142.50 120.00 - 31.00 55.00 67.50 67.50 12.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
@@ -755,7 +755,7 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: - - - - 19.00 - 1.00 - - - - - - - divsd %xmm0, %xmm2
# CHECK-NEXT: - - - - 19.00 - 1.00 1.00 - - - - - - divsd (%rax), %xmm2
# CHECK-NEXT: - - - - - - - - - 1.00 - - - - lfence
-# CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - maskmovdqu %xmm0, %xmm1
+# CHECK-NEXT: 21.00 21.00 - 1.00 - 1.00 2.00 2.00 - 16.00 2.00 - - - maskmovdqu %xmm0, %xmm1
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - maxpd %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - maxpd (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - maxsd %xmm0, %xmm2
OpenPOWER on IntegriCloud