diff options
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 18 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86SchedBroadwell.td | 8 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86SchedHaswell.td | 8 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86SchedSandyBridge.td | 8 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86SchedSkylakeClient.td | 8 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86SchedSkylakeServer.td | 8 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86Schedule.td | 24 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ScheduleAtom.td | 6 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ScheduleBdVer2.td | 6 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ScheduleBtVer2.td | 19 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ScheduleSLM.td | 8 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ScheduleZnver1.td | 8 | ||||
-rw-r--r-- | llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s | 22 | ||||
-rw-r--r-- | llvm/test/tools/llvm-mca/X86/BtVer2/resources-sse2.s | 6 |
14 files changed, 114 insertions, 43 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 16c76ca7a6e..b3c88982a39 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -7061,27 +7061,29 @@ let Predicates = [HasAVX1Only] in { // multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr, Intrinsic IntLd, Intrinsic IntLd256, - Intrinsic IntSt, Intrinsic IntSt256> { + Intrinsic IntSt, Intrinsic IntSt256, + X86SchedWriteMaskMove schedX, + X86SchedWriteMaskMove schedY> { def rm : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>, - VEX_4V, Sched<[WriteFMaskedLoad]>; + VEX_4V, Sched<[schedX.RM]>; def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, f256mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, - VEX_4V, VEX_L, Sched<[WriteFMaskedLoadY]>; + VEX_4V, VEX_L, Sched<[schedY.RM]>; def mr : AVX8I<opc_mr, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>, - VEX_4V, Sched<[WriteFMaskedStore]>; + VEX_4V, Sched<[schedX.MR]>; def Ymr : AVX8I<opc_mr, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src1, VR256:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, - VEX_4V, VEX_L, Sched<[WriteFMaskedStoreY]>; + VEX_4V, VEX_L, Sched<[schedY.MR]>; } let ExeDomain = SSEPackedSingle in @@ -7089,13 +7091,15 @@ defm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps", int_x86_avx_maskload_ps, int_x86_avx_maskload_ps_256, int_x86_avx_maskstore_ps, - int_x86_avx_maskstore_ps_256>; + int_x86_avx_maskstore_ps_256, + WriteFMaskMove32, WriteFMaskMove32Y>; let ExeDomain = SSEPackedDouble in defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd", int_x86_avx_maskload_pd, int_x86_avx_maskload_pd_256, int_x86_avx_maskstore_pd, - int_x86_avx_maskstore_pd_256>; + int_x86_avx_maskstore_pd_256, + WriteFMaskMove64, WriteFMaskMove64Y>; //===----------------------------------------------------------------------===// // VPERMIL - Permute Single and Double Floating-Point Values diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 7574e4b8f89..9b1fcaa8a13 100644 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -232,8 +232,12 @@ defm : X86WriteRes<WriteFStoreY, [BWPort237,BWPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteFStoreNT, [BWPort237,BWPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteFStoreNTX, [BWPort237,BWPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteFStoreNTY, [BWPort237,BWPort4], 1, [1,1], 2>; -defm : X86WriteRes<WriteFMaskedStore, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>; -defm : X86WriteRes<WriteFMaskedStoreY, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>; + +defm : X86WriteRes<WriteFMaskedStore32, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>; +defm : X86WriteRes<WriteFMaskedStore32Y, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>; +defm : X86WriteRes<WriteFMaskedStore64, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>; +defm : X86WriteRes<WriteFMaskedStore64Y, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>; + defm : X86WriteRes<WriteFMove, [BWPort5], 1, [1], 1>; defm : X86WriteRes<WriteFMoveX, [BWPort5], 1, [1], 1>; defm : X86WriteRes<WriteFMoveY, [BWPort5], 1, [1], 1>; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index 284d1567c5c..06f417501b2 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -231,8 +231,12 @@ defm : X86WriteRes<WriteFStoreY, [HWPort237,HWPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteFStoreNT, [HWPort237,HWPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteFStoreNTX, [HWPort237,HWPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteFStoreNTY, [HWPort237,HWPort4], 1, [1,1], 2>; -defm : X86WriteRes<WriteFMaskedStore, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>; -defm : X86WriteRes<WriteFMaskedStoreY, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>; + +defm : X86WriteRes<WriteFMaskedStore32, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>; +defm : X86WriteRes<WriteFMaskedStore32Y, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>; +defm : X86WriteRes<WriteFMaskedStore64, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>; +defm : X86WriteRes<WriteFMaskedStore64Y, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>; + defm : X86WriteRes<WriteFMove, [HWPort5], 1, [1], 1>; defm : X86WriteRes<WriteFMoveX, [HWPort5], 1, [1], 1>; defm : X86WriteRes<WriteFMoveY, [HWPort5], 1, [1], 1>; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index d40bdf728a4..26d4d8fa354 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -208,8 +208,12 @@ defm : X86WriteRes<WriteFStoreY, [SBPort23,SBPort4], 1, [1,1], 1>; defm : X86WriteRes<WriteFStoreNT, [SBPort23,SBPort4], 1, [1,1], 1>; defm : X86WriteRes<WriteFStoreNTX, [SBPort23,SBPort4], 1, [1,1], 1>; defm : X86WriteRes<WriteFStoreNTY, [SBPort23,SBPort4], 1, [1,1], 1>; -defm : X86WriteRes<WriteFMaskedStore, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>; -defm : X86WriteRes<WriteFMaskedStoreY, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>; + +defm : X86WriteRes<WriteFMaskedStore32, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>; +defm : X86WriteRes<WriteFMaskedStore32Y, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>; +defm : X86WriteRes<WriteFMaskedStore64, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>; +defm : X86WriteRes<WriteFMaskedStore64Y, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>; + defm : X86WriteRes<WriteFMove, [SBPort5], 1, [1], 1>; defm : X86WriteRes<WriteFMoveX, [SBPort5], 1, [1], 1>; defm : X86WriteRes<WriteFMoveY, [SBPort5], 1, [1], 1>; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index 8f3e4ae62d5..9a511ecc007 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -226,8 +226,12 @@ defm : X86WriteRes<WriteFStoreY, [SKLPort237,SKLPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteFStoreNT, [SKLPort237,SKLPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteFStoreNTX, [SKLPort237,SKLPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteFStoreNTY, [SKLPort237,SKLPort4], 1, [1,1], 2>; -defm : X86WriteRes<WriteFMaskedStore, [SKLPort237,SKLPort0], 2, [1,1], 2>; -defm : X86WriteRes<WriteFMaskedStoreY, [SKLPort237,SKLPort0], 2, [1,1], 2>; + +defm : X86WriteRes<WriteFMaskedStore32, [SKLPort237,SKLPort0], 2, [1,1], 2>; +defm : X86WriteRes<WriteFMaskedStore32Y, [SKLPort237,SKLPort0], 2, [1,1], 2>; +defm : X86WriteRes<WriteFMaskedStore64, [SKLPort237,SKLPort0], 2, [1,1], 2>; +defm : X86WriteRes<WriteFMaskedStore64Y, [SKLPort237,SKLPort0], 2, [1,1], 2>; + defm : X86WriteRes<WriteFMove, [SKLPort015], 1, [1], 1>; defm : X86WriteRes<WriteFMoveX, [SKLPort015], 1, [1], 1>; defm : X86WriteRes<WriteFMoveY, [SKLPort015], 1, [1], 1>; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 58caf1dacfc..a8c65435ab9 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -226,8 +226,12 @@ defm : X86WriteRes<WriteFStoreY, [SKXPort237,SKXPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteFStoreNT, [SKXPort237,SKXPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteFStoreNTX, [SKXPort237,SKXPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteFStoreNTY, [SKXPort237,SKXPort4], 1, [1,1], 2>; -defm : X86WriteRes<WriteFMaskedStore, [SKXPort237,SKXPort0], 2, [1,1], 2>; -defm : X86WriteRes<WriteFMaskedStoreY, [SKXPort237,SKXPort0], 2, [1,1], 2>; + +defm : X86WriteRes<WriteFMaskedStore32, [SKXPort237,SKXPort0], 2, [1,1], 2>; +defm : X86WriteRes<WriteFMaskedStore32Y, [SKXPort237,SKXPort0], 2, [1,1], 2>; +defm : X86WriteRes<WriteFMaskedStore64, [SKXPort237,SKXPort0], 2, [1,1], 2>; +defm : X86WriteRes<WriteFMaskedStore64Y, [SKXPort237,SKXPort0], 2, [1,1], 2>; + defm : X86WriteRes<WriteFMove, [SKXPort015], 1, [1], 1>; defm : X86WriteRes<WriteFMoveX, [SKXPort015], 1, [1], 1>; defm : X86WriteRes<WriteFMoveY, [SKXPort015], 1, [1], 1>; diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index 55ca85ec1e3..95f710061ae 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -102,6 +102,12 @@ class X86SchedWriteMoveLS<SchedWrite MoveRR, SchedWrite MR = StoreMR; } +// Multiclass that wraps masked load/store writes for a vector width. +class X86SchedWriteMaskMove<SchedWrite LoadRM, SchedWrite StoreMR> { + SchedWrite RM = LoadRM; + SchedWrite MR = StoreMR; +} + // Multiclass that wraps X86SchedWriteMoveLS for each vector width. class X86SchedWriteMoveLSWidths<X86SchedWriteMoveLS sScl, X86SchedWriteMoveLS s128, @@ -218,8 +224,12 @@ def WriteFStoreY : SchedWrite; def WriteFStoreNT : SchedWrite; def WriteFStoreNTX : SchedWrite; def WriteFStoreNTY : SchedWrite; -def WriteFMaskedStore : SchedWrite; -def WriteFMaskedStoreY : SchedWrite; + +def WriteFMaskedStore32 : SchedWrite; +def WriteFMaskedStore64 : SchedWrite; +def WriteFMaskedStore32Y : SchedWrite; +def WriteFMaskedStore64Y : SchedWrite; + def WriteFMove : SchedWrite; def WriteFMoveX : SchedWrite; def WriteFMoveY : SchedWrite; @@ -530,6 +540,16 @@ def SchedWriteVecMoveLSNT : X86SchedWriteMoveLSWidths<WriteVecMoveLSNT, WriteVecMoveLSNTX, WriteVecMoveLSNTY, WriteVecMoveLSNTY>; +// Conditional SIMD Packed Loads and Stores wrappers. +def WriteFMaskMove32 + : X86SchedWriteMaskMove<WriteFMaskedLoad, WriteFMaskedStore32>; +def WriteFMaskMove64 + : X86SchedWriteMaskMove<WriteFMaskedLoad, WriteFMaskedStore64>; +def WriteFMaskMove32Y + : X86SchedWriteMaskMove<WriteFMaskedLoadY, WriteFMaskedStore32Y>; +def WriteFMaskMove64Y + : X86SchedWriteMaskMove<WriteFMaskedLoadY, WriteFMaskedStore64Y>; + // Vector width wrappers. def SchedWriteFAdd : X86SchedWriteWidths<WriteFAdd, WriteFAddX, WriteFAddY, WriteFAddZ>; diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index b0334655de7..78acb1065ec 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -216,8 +216,10 @@ defm : X86WriteResUnsupported<WriteFStoreY>; def : WriteRes<WriteFStoreNT, [AtomPort0]>; def : WriteRes<WriteFStoreNTX, [AtomPort0]>; defm : X86WriteResUnsupported<WriteFStoreNTY>; -defm : X86WriteResUnsupported<WriteFMaskedStore>; -defm : X86WriteResUnsupported<WriteFMaskedStoreY>; +defm : X86WriteResUnsupported<WriteFMaskedStore32>; +defm : X86WriteResUnsupported<WriteFMaskedStore32Y>; +defm : X86WriteResUnsupported<WriteFMaskedStore64>; +defm : X86WriteResUnsupported<WriteFMaskedStore64Y>; def : WriteRes<WriteFMove, [AtomPort01]>; def : WriteRes<WriteFMoveX, [AtomPort01]>; diff --git a/llvm/lib/Target/X86/X86ScheduleBdVer2.td b/llvm/lib/Target/X86/X86ScheduleBdVer2.td index 8cc01c3acec..d7aea3cf4e9 100644 --- a/llvm/lib/Target/X86/X86ScheduleBdVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBdVer2.td @@ -726,8 +726,10 @@ defm : PdWriteRes<WriteFStoreNT, [PdStore, PdFPU1, PdFPSTO], 3>; defm : PdWriteRes<WriteFStoreNTX, [PdStore, PdFPU1, PdFPSTO], 3>; defm : PdWriteRes<WriteFStoreNTY, [PdStore, PdFPU1, PdFPSTO], 3, [2, 2, 2], 4>; -defm : PdWriteRes<WriteFMaskedStore, [PdStore, PdFPU01, PdFPFMA], 6, [1, 1, 188], 18>; -defm : PdWriteRes<WriteFMaskedStoreY, [PdStore, PdFPU01, PdFPFMA], 6, [2, 2, 376], 34>; +defm : PdWriteRes<WriteFMaskedStore32, [PdStore, PdFPU01, PdFPFMA], 6, [1, 1, 188], 18>; +defm : PdWriteRes<WriteFMaskedStore64, [PdStore, PdFPU01, PdFPFMA], 6, [1, 1, 188], 18>; +defm : PdWriteRes<WriteFMaskedStore32Y, [PdStore, PdFPU01, PdFPFMA], 6, [2, 2, 376], 34>; +defm : PdWriteRes<WriteFMaskedStore64Y, [PdStore, PdFPU01, PdFPFMA], 6, [2, 2, 376], 34>; defm : PdWriteRes<WriteFMove, [PdFPU01, PdFPFMA]>; defm : PdWriteRes<WriteFMoveX, [PdFPU01, PdFPFMA], 1, [1, 2]>; diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index 32549bc06e0..3addf048dba 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -512,8 +512,11 @@ defm : X86WriteRes<WriteFStoreY, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>; defm : X86WriteRes<WriteFStoreNT, [JSAGU, JFPU1, JSTC], 3, [1, 1, 1], 1>; defm : X86WriteRes<WriteFStoreNTX, [JSAGU, JFPU1, JSTC], 3, [1, 1, 1], 1>; defm : X86WriteRes<WriteFStoreNTY, [JSAGU, JFPU1, JSTC], 3, [2, 2, 2], 1>; -defm : X86WriteRes<WriteFMaskedStore, [JSAGU, JFPU01, JFPX], 6, [1, 1, 4], 1>; -defm : X86WriteRes<WriteFMaskedStoreY, [JSAGU, JFPU01, JFPX], 6, [2, 2, 4], 2>; + +defm : X86WriteRes<WriteFMaskedStore32, [JFPU0, JFPA, JFPU1, JSTC, JLAGU, JSAGU, JALU01], 16, [1,1, 5, 5,4,4,4], 19>; +defm : X86WriteRes<WriteFMaskedStore64, [JFPU0, JFPA, JFPU1, JSTC, JLAGU, JSAGU, JALU01], 13, [1,1, 2, 2,2,2,2], 10>; +defm : X86WriteRes<WriteFMaskedStore32Y, [JFPU0, JFPA, JFPU1, JSTC, JLAGU, JSAGU, JALU01], 22, [1,1,10,10,8,8,8], 36>; +defm : X86WriteRes<WriteFMaskedStore64Y, [JFPU0, JFPA, JFPU1, JSTC, JLAGU, JSAGU, JALU01], 16, [1,1, 4, 4,4,4,4], 18>; defm : X86WriteRes<WriteFMove, [JFPU01, JFPX], 1, [1, 1], 1>; defm : X86WriteRes<WriteFMoveX, [JFPU01, JFPX], 1, [1, 1], 1>; @@ -819,6 +822,18 @@ def JWriteJVZEROUPPER: SchedWriteRes<[]> { def : InstRW<[JWriteJVZEROUPPER], (instrs VZEROUPPER)>; /////////////////////////////////////////////////////////////////////////////// +// SSE2/AVX Store Selected Bytes of Double Quadword - (V)MASKMOVDQ +/////////////////////////////////////////////////////////////////////////////// + +def JWriteMASKMOVDQU: SchedWriteRes<[JFPU0, JFPA, JFPU1, JSTC, JLAGU, JSAGU, JALU01]> { + let Latency = 34; + let ResourceCycles = [1, 1, 2, 2, 2, 16, 42]; + let NumMicroOps = 63; +} +def : InstRW<[JWriteMASKMOVDQU], (instrs MASKMOVDQU, MASKMOVDQU64, + VMASKMOVDQU, VMASKMOVDQU64)>; + +/////////////////////////////////////////////////////////////////////////////// // SchedWriteVariant definitions. /////////////////////////////////////////////////////////////////////////////// diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index 34c251a5c5b..8e3ce721f1a 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -186,8 +186,12 @@ def : WriteRes<WriteFStoreY, [SLM_MEC_RSV]>; def : WriteRes<WriteFStoreNT, [SLM_MEC_RSV]>; def : WriteRes<WriteFStoreNTX, [SLM_MEC_RSV]>; def : WriteRes<WriteFStoreNTY, [SLM_MEC_RSV]>; -def : WriteRes<WriteFMaskedStore, [SLM_MEC_RSV]>; -def : WriteRes<WriteFMaskedStoreY, [SLM_MEC_RSV]>; + +def : WriteRes<WriteFMaskedStore32, [SLM_MEC_RSV]>; +def : WriteRes<WriteFMaskedStore32Y, [SLM_MEC_RSV]>; +def : WriteRes<WriteFMaskedStore64, [SLM_MEC_RSV]>; +def : WriteRes<WriteFMaskedStore64Y, [SLM_MEC_RSV]>; + def : WriteRes<WriteFMove, [SLM_FPC_RSV01]>; def : WriteRes<WriteFMoveX, [SLM_FPC_RSV01]>; def : WriteRes<WriteFMoveY, [SLM_FPC_RSV01]>; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index 65f6d89df61..06201f4a3a8 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -268,8 +268,12 @@ defm : X86WriteRes<WriteFStoreY, [ZnAGU], 1, [1], 1>; defm : X86WriteRes<WriteFStoreNT, [ZnAGU,ZnFPU2], 8, [1,1], 1>; defm : X86WriteRes<WriteFStoreNTX, [ZnAGU], 1, [1], 1>; defm : X86WriteRes<WriteFStoreNTY, [ZnAGU], 1, [1], 1>; -defm : X86WriteRes<WriteFMaskedStore, [ZnAGU,ZnFPU01], 4, [1,1], 1>; -defm : X86WriteRes<WriteFMaskedStoreY, [ZnAGU,ZnFPU01], 5, [1,2], 2>; + +defm : X86WriteRes<WriteFMaskedStore32, [ZnAGU,ZnFPU01], 4, [1,1], 1>; +defm : X86WriteRes<WriteFMaskedStore32Y, [ZnAGU,ZnFPU01], 5, [1,2], 2>; +defm : X86WriteRes<WriteFMaskedStore64, [ZnAGU,ZnFPU01], 4, [1,1], 1>; +defm : X86WriteRes<WriteFMaskedStore64Y, [ZnAGU,ZnFPU01], 5, [1,2], 2>; + defm : X86WriteRes<WriteFMove, [ZnFPU], 1, [1], 1>; defm : X86WriteRes<WriteFMoveX, [ZnFPU], 1, [1], 1>; defm : X86WriteRes<WriteFMoveY, [ZnFPU], 1, [1], 1>; diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s index 5b9c1dd66e9..1378a115b88 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s @@ -1219,15 +1219,15 @@ vzeroupper # CHECK-NEXT: 1 5 1.00 * vlddqu (%rax), %xmm2 # CHECK-NEXT: 1 5 1.00 * vlddqu (%rax), %ymm2 # CHECK-NEXT: 1 3 1.00 * U vldmxcsr (%rax) -# CHECK-NEXT: 1 1 1.00 * * U vmaskmovdqu %xmm0, %xmm1 +# CHECK-NEXT: 63 34 21.00 * * U vmaskmovdqu %xmm0, %xmm1 # CHECK-NEXT: 1 6 1.00 * vmaskmovpd (%rax), %xmm0, %xmm2 # CHECK-NEXT: 2 6 2.00 * vmaskmovpd (%rax), %ymm0, %ymm2 -# CHECK-NEXT: 1 6 2.00 * * vmaskmovpd %xmm0, %xmm1, (%rax) -# CHECK-NEXT: 2 6 2.00 * * vmaskmovpd %ymm0, %ymm1, (%rax) +# CHECK-NEXT: 10 13 2.00 * * vmaskmovpd %xmm0, %xmm1, (%rax) +# CHECK-NEXT: 18 16 4.00 * * vmaskmovpd %ymm0, %ymm1, (%rax) # CHECK-NEXT: 1 6 1.00 * vmaskmovps (%rax), %xmm0, %xmm2 # CHECK-NEXT: 2 6 2.00 * vmaskmovps (%rax), %ymm0, %ymm2 -# CHECK-NEXT: 1 6 2.00 * * vmaskmovps %xmm0, %xmm1, (%rax) -# CHECK-NEXT: 2 6 2.00 * * vmaskmovps %ymm0, %ymm1, (%rax) +# CHECK-NEXT: 19 16 5.00 * * vmaskmovps %xmm0, %xmm1, (%rax) +# CHECK-NEXT: 36 22 10.00 * * vmaskmovps %ymm0, %ymm1, (%rax) # CHECK-NEXT: 1 2 1.00 vmaxpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 7 1.00 * vmaxpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 2 2 2.00 vmaxpd %ymm0, %ymm1, %ymm2 @@ -1740,7 +1740,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] -# CHECK-NEXT: 56.00 - - 365.00 915.00 447.50 461.50 394.00 - 51.00 132.00 135.50 159.50 38.00 +# CHECK-NEXT: 86.00 30.00 - 362.00 907.00 449.50 480.50 414.00 - 78.00 154.00 135.50 159.50 38.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: @@ -1933,15 +1933,15 @@ vzeroupper # CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - vlddqu (%rax), %xmm2 # CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - vlddqu (%rax), %ymm2 # CHECK-NEXT: - - - - - - - 1.00 - - - - - - vldmxcsr (%rax) -# CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vmaskmovdqu %xmm0, %xmm1 +# CHECK-NEXT: 21.00 21.00 - 1.00 - 1.00 2.00 2.00 - 16.00 2.00 - - - vmaskmovdqu %xmm0, %xmm1 # CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 1.00 - - - - - - vmaskmovpd (%rax), %xmm0, %xmm2 # CHECK-NEXT: - - - 2.00 2.00 2.00 2.00 2.00 - - - - - - vmaskmovpd (%rax), %ymm0, %ymm2 -# CHECK-NEXT: - - - 2.00 2.00 0.50 0.50 - - 1.00 - - - - vmaskmovpd %xmm0, %xmm1, (%rax) -# CHECK-NEXT: - - - 2.00 2.00 1.00 1.00 - - 2.00 - - - - vmaskmovpd %ymm0, %ymm1, (%rax) +# CHECK-NEXT: 1.00 1.00 - 1.00 - 1.00 2.00 2.00 - 2.00 2.00 - - - vmaskmovpd %xmm0, %xmm1, (%rax) +# CHECK-NEXT: 2.00 2.00 - 1.00 - 1.00 4.00 4.00 - 4.00 4.00 - - - vmaskmovpd %ymm0, %ymm1, (%rax) # CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 1.00 - - - - - - vmaskmovps (%rax), %xmm0, %xmm2 # CHECK-NEXT: - - - 2.00 2.00 2.00 2.00 2.00 - - - - - - vmaskmovps (%rax), %ymm0, %ymm2 -# CHECK-NEXT: - - - 2.00 2.00 0.50 0.50 - - 1.00 - - - - vmaskmovps %xmm0, %xmm1, (%rax) -# CHECK-NEXT: - - - 2.00 2.00 1.00 1.00 - - 2.00 - - - - vmaskmovps %ymm0, %ymm1, (%rax) +# CHECK-NEXT: 2.00 2.00 - 1.00 - 1.00 5.00 4.00 - 4.00 5.00 - - - vmaskmovps %xmm0, %xmm1, (%rax) +# CHECK-NEXT: 4.00 4.00 - 1.00 - 1.00 10.00 8.00 - 8.00 10.00 - - - vmaskmovps %ymm0, %ymm1, (%rax) # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vmaxpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vmaxpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vmaxpd %ymm0, %ymm1, %ymm2 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-sse2.s index c9d7f32d4e0..924066ae87b 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-sse2.s @@ -465,7 +465,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 19 19.00 divsd %xmm0, %xmm2 # CHECK-NEXT: 1 24 19.00 * divsd (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 * * U lfence -# CHECK-NEXT: 1 1 1.00 * * U maskmovdqu %xmm0, %xmm1 +# CHECK-NEXT: 63 34 21.00 * * U maskmovdqu %xmm0, %xmm1 # CHECK-NEXT: 1 2 1.00 maxpd %xmm0, %xmm2 # CHECK-NEXT: 1 7 1.00 * maxpd (%rax), %xmm2 # CHECK-NEXT: 1 2 1.00 maxsd %xmm0, %xmm2 @@ -693,7 +693,7 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] -# CHECK-NEXT: 17.00 - - 49.00 204.00 128.50 141.50 118.00 - 16.00 54.00 67.50 67.50 12.00 +# CHECK-NEXT: 38.00 21.00 - 50.00 204.00 129.50 142.50 120.00 - 31.00 55.00 67.50 67.50 12.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: @@ -755,7 +755,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - - - 19.00 - 1.00 - - - - - - - divsd %xmm0, %xmm2 # CHECK-NEXT: - - - - 19.00 - 1.00 1.00 - - - - - - divsd (%rax), %xmm2 # CHECK-NEXT: - - - - - - - - - 1.00 - - - - lfence -# CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - maskmovdqu %xmm0, %xmm1 +# CHECK-NEXT: 21.00 21.00 - 1.00 - 1.00 2.00 2.00 - 16.00 2.00 - - - maskmovdqu %xmm0, %xmm1 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - maxpd %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - maxpd (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - maxsd %xmm0, %xmm2 |