diff options
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 16 | ||||
| -rwxr-xr-x | llvm/lib/Target/X86/X86SchedBroadwell.td | 48 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedHaswell.td | 53 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedSandyBridge.td | 46 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedSkylakeClient.td | 42 | ||||
| -rwxr-xr-x | llvm/lib/Target/X86/X86SchedSkylakeServer.td | 44 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86Schedule.td | 20 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleAtom.td | 20 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleBtVer2.td | 46 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleSLM.td | 22 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleZnver1.td | 53 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx2-schedule.ll | 16 |
12 files changed, 171 insertions, 255 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index d5630e58a02..b52ca8b3fef 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -7102,22 +7102,22 @@ multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr, (ins VR128:$src1, f128mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>, - VEX_4V, Sched<[WriteFLoad]>; + VEX_4V, Sched<[WriteFMaskedLoad]>; def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, f256mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, - VEX_4V, VEX_L, Sched<[WriteFLoad]>; + VEX_4V, VEX_L, Sched<[WriteFMaskedLoadY]>; def mr : AVX8I<opc_mr, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>, - VEX_4V, Sched<[WriteFStore]>; + VEX_4V, Sched<[WriteFMaskedStore]>; def Ymr : AVX8I<opc_mr, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src1, VR256:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, - VEX_4V, VEX_L, Sched<[WriteFStore]>; + VEX_4V, VEX_L, Sched<[WriteFMaskedStoreY]>; } let ExeDomain = SSEPackedSingle in @@ -7729,22 +7729,22 @@ multiclass avx2_pmovmask<string OpcodeStr, (ins VR128:$src1, i128mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR128:$dst, (IntLd128 addr:$src2, VR128:$src1))]>, - VEX_4V, Sched<[WriteVecLoad]>; + VEX_4V, Sched<[WriteVecMaskedLoad]>; def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, - VEX_4V, VEX_L, Sched<[WriteVecLoad]>; + VEX_4V, VEX_L, Sched<[WriteVecMaskedLoadY]>; def mr : AVX28I<0x8e, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(IntSt128 addr:$dst, VR128:$src1, VR128:$src2)]>, - VEX_4V, Sched<[WriteVecStore]>; + VEX_4V, Sched<[WriteVecMaskedStore]>; def Ymr : AVX28I<0x8e, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src1, VR256:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, - VEX_4V, VEX_L, Sched<[WriteVecStore]>; + VEX_4V, VEX_L, Sched<[WriteVecMaskedStoreY]>; } defm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd", diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 59f45c23041..4549856acb4 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -151,9 +151,13 @@ def : InstRW<[WriteMove], (instrs COPY)>; defm : BWWriteResPair<WriteJump, [BWPort06], 1>; // Floating point. This covers both scalar and vector operations. -def : WriteRes<WriteFLoad, [BWPort23]> { let Latency = 5; } -def : WriteRes<WriteFStore, [BWPort237, BWPort4]>; -def : WriteRes<WriteFMove, [BWPort5]>; +defm : X86WriteRes<WriteFLoad, [BWPort23], 5, [1], 1>; +defm : X86WriteRes<WriteFMaskedLoad, [BWPort23,BWPort5], 7, [1,2], 3>; +defm : X86WriteRes<WriteFMaskedLoadY, [BWPort23,BWPort5], 8, [1,2], 3>; +defm : X86WriteRes<WriteFStore, [BWPort237,BWPort4], 1, [1,1], 1>; +defm : X86WriteRes<WriteFMaskedStore, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>; +defm : X86WriteRes<WriteFMaskedStoreY, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>; +defm : X86WriteRes<WriteFMove, [BWPort5], 1, [1], 1>; defm : BWWriteResPair<WriteFAdd, [BWPort1], 3, [1], 1, 5>; // Floating point add/sub. defm : BWWriteResPair<WriteFAddX, [BWPort1], 3, [1], 1, 5>; // Floating point add/sub (XMM). @@ -241,10 +245,14 @@ def : WriteRes<WriteCvtF2FSt, [BWPort1,BWPort4,BWPort237]> { // class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } // Vector integer operations. -def : WriteRes<WriteVecLoad, [BWPort23]> { let Latency = 5; } -def : WriteRes<WriteVecStore, [BWPort237, BWPort4]>; -def : WriteRes<WriteVecMove, [BWPort015]>; -defm : X86WriteRes<WriteEMMS, [BWPort01,BWPort15,BWPort015,BWPort0156], 31, [8,1,21,1], 31>; +defm : X86WriteRes<WriteVecLoad, [BWPort23], 5, [1], 1>; +defm : X86WriteRes<WriteVecMaskedLoad, [BWPort23,BWPort5], 7, [1,2], 3>; +defm : X86WriteRes<WriteVecMaskedLoadY, [BWPort23,BWPort5], 8, [1,2], 3>; +defm : X86WriteRes<WriteVecStore, [BWPort237,BWPort4], 1, [1,1], 1>; +defm : X86WriteRes<WriteVecMaskedStore, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>; +defm : X86WriteRes<WriteVecMaskedStoreY, [BWPort0,BWPort4,BWPort237,BWPort15], 5, [1,1,1,1], 4>; +defm : X86WriteRes<WriteVecMove, [BWPort015], 1, [1], 1>; +defm : X86WriteRes<WriteEMMS, [BWPort01,BWPort15,BWPort015,BWPort0156], 31, [8,1,21,1], 31>; defm : BWWriteResPair<WriteVecALU, [BWPort15], 1, [1], 1, 5>; // Vector integer ALU op, no logicals. defm : BWWriteResPair<WriteVecALUY, [BWPort15], 1, [1], 1, 6>; // Vector integer ALU op, no logicals (YMM/ZMM). @@ -899,16 +907,6 @@ def BWWriteResGroup52 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> { } def: InstRW<[BWWriteResGroup52], (instrs IMUL32r, MUL32r, MULX32rr)>; -def BWWriteResGroup53 : SchedWriteRes<[BWPort0,BWPort4,BWPort237,BWPort15]> { - let Latency = 5; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[BWWriteResGroup53], (instregex "VMASKMOVPD(Y?)mr", - "VMASKMOVPS(Y?)mr", - "VPMASKMOVD(Y?)mr", - "VPMASKMOVQ(Y?)mr")>; - def BWWriteResGroup54 : SchedWriteRes<[BWPort6,BWPort0156]> { let Latency = 5; let NumMicroOps = 5; @@ -1107,11 +1105,7 @@ def BWWriteResGroup79 : SchedWriteRes<[BWPort5,BWPort23]> { } def: InstRW<[BWWriteResGroup79], (instregex "MMX_PACKSSDWirm", "MMX_PACKSSWBirm", - "MMX_PACKUSWBirm", - "VMASKMOVPDrm", - "VMASKMOVPSrm", - "VPMASKMOVDrm", - "VPMASKMOVQrm")>; + "MMX_PACKUSWBirm")>; def BWWriteResGroup80 : SchedWriteRes<[BWPort23,BWPort0156]> { let Latency = 7; @@ -1212,16 +1206,6 @@ def: InstRW<[BWWriteResGroup92], (instregex "VPMOVSXBDYrm", "VPMOVSXWQYrm", "VPMOVZXWDYrm")>; -def BWWriteResGroup94 : SchedWriteRes<[BWPort5,BWPort23]> { - let Latency = 8; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; -} -def: InstRW<[BWWriteResGroup94], (instregex "VMASKMOVPDYrm", - "VMASKMOVPSYrm", - "VPMASKMOVDYrm", - "VPMASKMOVQYrm")>; - def BWWriteResGroup97 : SchedWriteRes<[BWPort23,BWPort237,BWPort06,BWPort0156]> { let Latency = 8; let NumMicroOps = 5; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index d682edf8dd3..ad626bbc6c3 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -143,11 +143,16 @@ defm : HWWriteResPair<WriteBZHI, [HWPort15], 1>; // This is quite rough, latency depends on the dividend. defm : HWWriteResPair<WriteIDiv, [HWPort0, HWDivider], 25, [1,10], 1, 4>; + // Scalar and vector floating point. -def : WriteRes<WriteFStore, [HWPort237, HWPort4]>; -def : WriteRes<WriteFLoad, [HWPort23]> { let Latency = 5; } -def : WriteRes<WriteFMove, [HWPort5]>; -defm : X86WriteRes<WriteEMMS, [HWPort01,HWPort15,HWPort015,HWPort0156], 31, [8,1,21,1], 31>; +defm : X86WriteRes<WriteFLoad, [HWPort23], 5, [1], 1>; +defm : X86WriteRes<WriteFMaskedLoad, [HWPort23,HWPort5], 8, [1,2], 3>; +defm : X86WriteRes<WriteFMaskedLoadY, [HWPort23,HWPort5], 9, [1,2], 3>; +defm : X86WriteRes<WriteFStore, [HWPort237,HWPort4], 1, [1,1], 1>; +defm : X86WriteRes<WriteFMaskedStore, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>; +defm : X86WriteRes<WriteFMaskedStoreY, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>; +defm : X86WriteRes<WriteFMove, [HWPort5], 1, [1], 1>; +defm : X86WriteRes<WriteEMMS, [HWPort01,HWPort15,HWPort015,HWPort0156], 31, [8,1,21,1], 31>; defm : HWWriteResPair<WriteFAdd, [HWPort1], 3, [1], 1, 5>; defm : HWWriteResPair<WriteFAddX, [HWPort1], 3, [1], 1, 6>; @@ -235,9 +240,13 @@ def : WriteRes<WriteCvtF2FSt, [HWPort1,HWPort4,HWPort5,HWPort237]> { } // Vector integer operations. -def : WriteRes<WriteVecStore, [HWPort237, HWPort4]>; -def : WriteRes<WriteVecLoad, [HWPort23]> { let Latency = 5; } -def : WriteRes<WriteVecMove, [HWPort015]>; +defm : X86WriteRes<WriteVecLoad, [HWPort23], 5, [1], 1>; +defm : X86WriteRes<WriteVecMaskedLoad, [HWPort23,HWPort5], 8, [1,2], 3>; +defm : X86WriteRes<WriteVecMaskedLoadY, [HWPort23,HWPort5], 9, [1,2], 3>; +defm : X86WriteRes<WriteVecStore, [HWPort237,HWPort4], 1, [1,1], 1>; +defm : X86WriteRes<WriteVecMaskedStore, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>; +defm : X86WriteRes<WriteVecMaskedStoreY, [HWPort0,HWPort4,HWPort237,HWPort15], 5, [1,1,1,1], 4>; +defm : X86WriteRes<WriteVecMove, [HWPort015], 1, [1], 1>; defm : HWWriteResPair<WriteVecLogic, [HWPort015], 1, [1], 1, 6>; defm : HWWriteResPair<WriteVecLogicY,[HWPort015], 1, [1], 1, 7>; @@ -1156,26 +1165,6 @@ def: InstRW<[HWWriteResGroup35], (instregex "ADC(8|16|32|64)ri", "SBB(8|16|32|64)i", "SET(A|BE)r")>; -def HWWriteResGroup36 : SchedWriteRes<[HWPort5,HWPort23]> { - let Latency = 8; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; -} -def: InstRW<[HWWriteResGroup36], (instregex "VMASKMOVPDrm", - "VMASKMOVPSrm", - "VPMASKMOVDrm", - "VPMASKMOVQrm")>; - -def HWWriteResGroup36_1 : SchedWriteRes<[HWPort5,HWPort23]> { - let Latency = 9; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; -} -def: InstRW<[HWWriteResGroup36_1], (instregex "VMASKMOVPDYrm", - "VMASKMOVPSYrm", - "VPMASKMOVDYrm", - "VPMASKMOVQYrm")>; - def HWWriteResGroup36_2 : SchedWriteRes<[HWPort5,HWPort23]> { let Latency = 7; let NumMicroOps = 3; @@ -1579,16 +1568,6 @@ def HWWriteResGroup83 : SchedWriteRes<[HWPort1,HWPort6,HWPort0156]> { } def: InstRW<[HWWriteResGroup83], (instregex "LAR(16|32|64)rr")>; -def HWWriteResGroup84 : SchedWriteRes<[HWPort0,HWPort4,HWPort237,HWPort15]> { - let Latency = 5; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[HWWriteResGroup84], (instregex "VMASKMOVPD(Y?)mr", - "VMASKMOVPS(Y?)mr", - "VPMASKMOVD(Y?)mr", - "VPMASKMOVQ(Y?)mr")>; - def HWWriteResGroup86 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort0156]> { let Latency = 10; let NumMicroOps = 4; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index db6dc073118..a9b82c9f5ef 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -134,10 +134,14 @@ defm : SBWriteResPair<WriteBEXTR, [SBPort05,SBPort1], 2, [1,1], 2>; defm : SBWriteResPair<WriteBZHI, [SBPort1], 1>; // Scalar and vector floating point. -def : WriteRes<WriteFStore, [SBPort23, SBPort4]>; -def : WriteRes<WriteFLoad, [SBPort23]> { let Latency = 6; } -def : WriteRes<WriteFMove, [SBPort5]>; -defm : X86WriteRes<WriteEMMS, [SBPort015], 31, [31], 31>; +defm : X86WriteRes<WriteFLoad, [SBPort23], 6, [1], 1>; +defm : X86WriteRes<WriteFMaskedLoad, [SBPort23,SBPort05], 8, [1,2], 3>; +defm : X86WriteRes<WriteFMaskedLoadY, [SBPort23,SBPort05], 9, [1,2], 3>; +defm : X86WriteRes<WriteFStore, [SBPort23,SBPort4], 1, [1,1], 1>; +defm : X86WriteRes<WriteFMaskedStore, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>; +defm : X86WriteRes<WriteFMaskedStoreY, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>; +defm : X86WriteRes<WriteFMove, [SBPort5], 1, [1], 1>; +defm : X86WriteRes<WriteEMMS, [SBPort015], 31, [31], 31>; defm : SBWriteResPair<WriteFAdd, [SBPort1], 3, [1], 1, 6>; defm : SBWriteResPair<WriteFAddX, [SBPort1], 3, [1], 1, 6>; @@ -213,9 +217,13 @@ defm : SBWriteResPair<WriteFVarBlendY,[SBPort05], 2, [2], 2, 7>; def : WriteRes<WriteCvtF2FSt, [SBPort1, SBPort23, SBPort4]> { let Latency = 4; } // Vector integer operations. -def : WriteRes<WriteVecStore, [SBPort23, SBPort4]>; -def : WriteRes<WriteVecLoad, [SBPort23]> { let Latency = 6; } -def : WriteRes<WriteVecMove, [SBPort05]>; +defm : X86WriteRes<WriteVecLoad, [SBPort23], 6, [1], 1>; +defm : X86WriteRes<WriteVecMaskedLoad, [SBPort23,SBPort05], 8, [1,2], 3>; +defm : X86WriteRes<WriteVecMaskedLoadY, [SBPort23,SBPort05], 9, [1,2], 3>; +defm : X86WriteRes<WriteVecStore, [SBPort23,SBPort4], 1, [1,1], 1>; +defm : X86WriteRes<WriteVecMaskedStore, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>; +defm : X86WriteRes<WriteVecMaskedStoreY, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>; +defm : X86WriteRes<WriteVecMove, [SBPort05], 1, [1], 1>; defm : SBWriteResPair<WriteVecLogic, [SBPort015], 1, [1], 1, 6>; defm : SBWriteResPair<WriteVecLogicY,[SBPort015], 1, [1], 1, 7>; @@ -786,14 +794,6 @@ def: InstRW<[SBWriteResGroup36], (instregex "CALL64pcrel32", "CALL(16|32|64)r", "(V?)EXTRACTPSmr")>; -def SBWriteResGroup37 : SchedWriteRes<[SBPort4,SBPort01,SBPort23]> { - let Latency = 5; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPD(Y?)mr", - "VMASKMOVPS(Y?)mr")>; - def SBWriteResGroup40 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> { let Latency = 5; let NumMicroOps = 3; @@ -1060,14 +1060,6 @@ def SBWriteResGroup72 : SchedWriteRes<[SBPort1,SBPort23]> { } def: InstRW<[SBWriteResGroup72], (instrs MUL8m)>; -def SBWriteResGroup75 : SchedWriteRes<[SBPort23,SBPort05]> { - let Latency = 8; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; -} -def: InstRW<[SBWriteResGroup75], (instregex "VMASKMOVPDrm", - "VMASKMOVPSrm")>; - def SBWriteResGroup77 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { let Latency = 8; let NumMicroOps = 3; @@ -1169,14 +1161,6 @@ def: InstRW<[SBWriteResGroup90], (instregex "MMX_CVTPS2PIirm", "(V?)CVTPS2DQrm", "(V?)CVTTPS2DQrm")>; -def SBWriteResGroup91 : SchedWriteRes<[SBPort23,SBPort05]> { - let Latency = 9; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; -} -def: InstRW<[SBWriteResGroup91], (instregex "VMASKMOVPDYrm", - "VMASKMOVPSYrm")>; - def SBWriteResGroup93 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { let Latency = 9; let NumMicroOps = 3; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index c52914357ed..b2e41582163 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -147,10 +147,14 @@ def : WriteRes<WriteZero, []>; defm : SKLWriteResPair<WriteJump, [SKLPort06], 1>; // Floating point. This covers both scalar and vector operations. -def : WriteRes<WriteFLoad, [SKLPort23]> { let Latency = 6; } -def : WriteRes<WriteFStore, [SKLPort237, SKLPort4]>; -def : WriteRes<WriteFMove, [SKLPort015]>; -defm : X86WriteRes<WriteEMMS, [SKLPort05,SKLPort0156], 10, [9,1], 10>; +defm : X86WriteRes<WriteFLoad, [SKLPort23], 6, [1], 1>; +defm : X86WriteRes<WriteFMaskedLoad, [SKLPort23,SKLPort015], 7, [1,1], 2>; +defm : X86WriteRes<WriteFMaskedLoadY, [SKLPort23,SKLPort015], 8, [1,1], 2>; +defm : X86WriteRes<WriteFStore, [SKLPort237,SKLPort4], 1, [1,1], 1>; +defm : X86WriteRes<WriteFMaskedStore, [SKLPort237,SKLPort0], 2, [1,1], 2>; +defm : X86WriteRes<WriteFMaskedStoreY, [SKLPort237,SKLPort0], 2, [1,1], 2>; +defm : X86WriteRes<WriteFMove, [SKLPort015], 1, [1], 1>; +defm : X86WriteRes<WriteEMMS, [SKLPort05,SKLPort0156], 10, [9,1], 10>; defm : SKLWriteResPair<WriteFAdd, [SKLPort01], 4, [1], 1, 5>; // Floating point add/sub. defm : SKLWriteResPair<WriteFAddX, [SKLPort01], 4, [1], 1, 6>; // Floating point add/sub (XMM). @@ -234,9 +238,13 @@ def : WriteRes<WriteCvtF2FSt, [SKLPort4,SKLPort5,SKLPort237,SKLPort01]> { // class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } // Vector integer operations. -def : WriteRes<WriteVecLoad, [SKLPort23]> { let Latency = 6; } -def : WriteRes<WriteVecStore, [SKLPort237, SKLPort4]>; -def : WriteRes<WriteVecMove, [SKLPort015]>; +defm : X86WriteRes<WriteVecLoad, [SKLPort23], 6, [1], 1>; +defm : X86WriteRes<WriteVecMaskedLoad, [SKLPort23,SKLPort015], 7, [1,1], 2>; +defm : X86WriteRes<WriteVecMaskedLoadY, [SKLPort23,SKLPort015], 8, [1,1], 2>; +defm : X86WriteRes<WriteVecStore, [SKLPort237,SKLPort4], 1, [1,1], 1>; +defm : X86WriteRes<WriteVecMaskedStore, [SKLPort237,SKLPort0], 2, [1,1], 2>; +defm : X86WriteRes<WriteVecMaskedStoreY, [SKLPort237,SKLPort0], 2, [1,1], 2>; +defm : X86WriteRes<WriteVecMove, [SKLPort015], 1, [1], 1>; defm : SKLWriteResPair<WriteVecALU, [SKLPort01], 1, [1], 1, 6>; // Vector integer ALU op, no logicals. defm : SKLWriteResPair<WriteVecALUY, [SKLPort01], 1, [1], 1, 7>; // Vector integer ALU op, no logicals (YMM/ZMM). @@ -646,16 +654,6 @@ def: InstRW<[SKLWriteResGroup17], (instrs LFENCE, WAIT, XGETBV)>; -def SKLWriteResGroup18 : SchedWriteRes<[SKLPort0,SKLPort237]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup18], (instregex "VMASKMOVPD(Y?)mr", - "VMASKMOVPS(Y?)mr", - "VPMASKMOVD(Y?)mr", - "VPMASKMOVQ(Y?)mr")>; - def SKLWriteResGroup20 : SchedWriteRes<[SKLPort6,SKLPort0156]> { let Latency = 2; let NumMicroOps = 2; @@ -1246,15 +1244,11 @@ def SKLWriteResGroup91 : SchedWriteRes<[SKLPort23,SKLPort015]> { } def: InstRW<[SKLWriteResGroup91], (instregex "(V?)INSERTF128rm", "(V?)INSERTI128rm", - "(V?)MASKMOVPDrm", - "(V?)MASKMOVPSrm", "(V?)PADDBrm", "(V?)PADDDrm", "(V?)PADDQrm", "(V?)PADDWrm", "(V?)PBLENDDrmi", - "(V?)PMASKMOVDrm", - "(V?)PMASKMOVQrm", "(V?)PSUBBrm", "(V?)PSUBDrm", "(V?)PSUBQrm", @@ -1382,15 +1376,11 @@ def SKLWriteResGroup110 : SchedWriteRes<[SKLPort23,SKLPort015]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup110], (instregex "VMASKMOVPDYrm", - "VMASKMOVPSYrm", - "VPADDBYrm", +def: InstRW<[SKLWriteResGroup110], (instregex "VPADDBYrm", "VPADDDYrm", "VPADDQYrm", "VPADDWYrm", "VPBLENDDYrmi", - "VPMASKMOVDYrm", - "VPMASKMOVQYrm", "VPSUBBYrm", "VPSUBDYrm", "VPSUBQYrm", diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index e05ced17a44..5ffa0f5d5ae 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -147,10 +147,14 @@ def : WriteRes<WriteZero, []>; defm : SKXWriteResPair<WriteJump, [SKXPort06], 1>; // Floating point. This covers both scalar and vector operations. -def : WriteRes<WriteFLoad, [SKXPort23]> { let Latency = 5; } -def : WriteRes<WriteFStore, [SKXPort237, SKXPort4]>; -def : WriteRes<WriteFMove, [SKXPort015]>; -defm : X86WriteRes<WriteEMMS, [SKXPort05,SKXPort0156], 10, [9,1], 10>; +defm : X86WriteRes<WriteFLoad, [SKXPort23], 5, [1], 1>; +defm : X86WriteRes<WriteFMaskedLoad, [SKXPort23,SKXPort015], 7, [1,1], 2>; +defm : X86WriteRes<WriteFMaskedLoadY, [SKXPort23,SKXPort015], 8, [1,1], 2>; +defm : X86WriteRes<WriteFStore, [SKXPort237,SKXPort4], 1, [1,1], 1>; +defm : X86WriteRes<WriteFMaskedStore, [SKXPort237,SKXPort0], 2, [1,1], 2>; +defm : X86WriteRes<WriteFMaskedStoreY, [SKXPort237,SKXPort0], 2, [1,1], 2>; +defm : X86WriteRes<WriteFMove, [SKXPort015], 1, [1], 1>; +defm : X86WriteRes<WriteEMMS, [SKXPort05,SKXPort0156], 10, [9,1], 10>; defm : SKXWriteResPair<WriteFAdd, [SKXPort015], 4, [1], 1, 5>; // Floating point add/sub. defm : SKXWriteResPair<WriteFAddX, [SKXPort015], 4, [1], 1, 6>; // Floating point add/sub (XMM). @@ -234,9 +238,13 @@ def : WriteRes<WriteCvtF2FSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort015]> { // class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } // Vector integer operations. -def : WriteRes<WriteVecLoad, [SKXPort23]> { let Latency = 5; } -def : WriteRes<WriteVecStore, [SKXPort237, SKXPort4]>; -def : WriteRes<WriteVecMove, [SKXPort015]>; +defm : X86WriteRes<WriteVecLoad, [SKXPort23], 5, [1], 1>; +defm : X86WriteRes<WriteVecMaskedLoad, [SKXPort23,SKXPort015], 7, [1,1], 2>; +defm : X86WriteRes<WriteVecMaskedLoadY, [SKXPort23,SKXPort015], 8, [1,1], 2>; +defm : X86WriteRes<WriteVecStore, [SKXPort237,SKXPort4], 1, [1,1], 1>; +defm : X86WriteRes<WriteVecMaskedStore, [SKXPort237,SKXPort0], 2, [1,1], 2>; +defm : X86WriteRes<WriteVecMaskedStoreY, [SKXPort237,SKXPort0], 2, [1,1], 2>; +defm : X86WriteRes<WriteVecMove, [SKXPort015], 1, [1], 1>; defm : SKXWriteResPair<WriteVecALU, [SKXPort01], 1, [1], 1, 6>; // Vector integer ALU op, no logicals. defm : SKXWriteResPair<WriteVecALUY, [SKXPort01], 1, [1], 1, 7>; // Vector integer ALU op, no logicals (YMM/ZMM). @@ -845,20 +853,6 @@ def: InstRW<[SKXWriteResGroup17], (instrs LFENCE, WAIT, XGETBV)>; -def SKXWriteResGroup18 : SchedWriteRes<[SKXPort0,SKXPort237]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKXWriteResGroup18], (instregex "VMASKMOVPDYmr", - "VMASKMOVPDmr", - "VMASKMOVPSYmr", - "VMASKMOVPSmr", - "VPMASKMOVDYmr", - "VPMASKMOVDmr", - "VPMASKMOVQYmr", - "VPMASKMOVQmr")>; - def SKXWriteResGroup20 : SchedWriteRes<[SKXPort6,SKXPort0156]> { let Latency = 2; let NumMicroOps = 2; @@ -1936,8 +1930,6 @@ def: InstRW<[SKXWriteResGroup95], (instregex "VBLENDMPDZ128rm(b?)", "VBROADCASTSSZ128m(b?)", "VINSERTF128rm", "VINSERTI128rm", - "VMASKMOVPDrm", - "VMASKMOVPSrm", "VMOVAPDZ128rm(b?)", "VMOVAPSZ128rm(b?)", "VMOVDDUPZ128rm(b?)", @@ -1967,8 +1959,6 @@ def: InstRW<[SKXWriteResGroup95], (instregex "VBLENDMPDZ128rm(b?)", "VPBLENDMWZ128rm(b?)", "VPBROADCASTDZ128m(b?)", "VPBROADCASTQZ128m(b?)", - "VPMASKMOVDrm", - "VPMASKMOVQrm", "VPSUBBZ128rm(b?)", "(V?)PSUBBrm", "VPSUBDZ128rm(b?)", @@ -2226,8 +2216,6 @@ def: InstRW<[SKXWriteResGroup121], (instregex "VBLENDMPDZ256rm(b?)", "VINSERTI64x2Z256rm(b?)", "VINSERTI64x2Zrm(b?)", "VINSERTI64x4Zrm(b?)", - "VMASKMOVPDYrm", - "VMASKMOVPSYrm", "VMOVAPDZ256rm(b?)", "VMOVAPDZrm(b?)", "VMOVAPSZ256rm(b?)", @@ -2280,8 +2268,6 @@ def: InstRW<[SKXWriteResGroup121], (instregex "VBLENDMPDZ256rm(b?)", "VPBROADCASTDZm(b?)", "VPBROADCASTQZ256m(b?)", "VPBROADCASTQZm(b?)", - "VPMASKMOVDYrm", - "VPMASKMOVQYrm", "VPSUBBYrm", "VPSUBBZ256rm(b?)", "VPSUBBZrm(b?)", diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index 955e8b625a4..5ab237903f0 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -105,9 +105,13 @@ def WriteZero : SchedWrite; defm WriteJump : X86SchedWritePair; // Floating point. This covers both scalar and vector operations. -def WriteFLoad : SchedWrite; -def WriteFStore : SchedWrite; -def WriteFMove : SchedWrite; +def WriteFLoad : SchedWrite; +def WriteFMaskedLoad : SchedWrite; +def WriteFMaskedLoadY : SchedWrite; +def WriteFStore : SchedWrite; +def WriteFMaskedStore : SchedWrite; +def WriteFMaskedStoreY : SchedWrite; +def WriteFMove : SchedWrite; defm WriteFAdd : X86SchedWritePair; // Floating point add/sub. defm WriteFAddX : X86SchedWritePair; // Floating point add/sub (XMM). @@ -183,9 +187,13 @@ defm WritePHAdd : X86SchedWritePair; defm WritePHAddY : X86SchedWritePair; // YMM/ZMM. // Vector integer operations. -def WriteVecLoad : SchedWrite; -def WriteVecStore : SchedWrite; -def WriteVecMove : SchedWrite; +def WriteVecLoad : SchedWrite; +def WriteVecMaskedLoad : SchedWrite; +def WriteVecMaskedLoadY : SchedWrite; +def WriteVecStore : SchedWrite; +def WriteVecMaskedStore : SchedWrite; +def WriteVecMaskedStoreY : SchedWrite; +def WriteVecMove : SchedWrite; defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals. defm WriteVecALUY : X86SchedWritePair; // Vector integer ALU op, no logicals (YMM/ZMM). diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index 3582e877b0b..c68dfcccaa0 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -198,8 +198,14 @@ def : WriteRes<WriteNop, [AtomPort01]>; // Floating point. This covers both scalar and vector operations. //////////////////////////////////////////////////////////////////////////////// -def : WriteRes<WriteFLoad, [AtomPort0]>; -def : WriteRes<WriteFStore, [AtomPort0]>; +def : WriteRes<WriteFLoad, [AtomPort0]>; +def : WriteRes<WriteFMaskedLoad, [AtomPort0]>; +def : WriteRes<WriteFMaskedLoadY, [AtomPort0]>; + +def : WriteRes<WriteFStore, [AtomPort0]>; +def : WriteRes<WriteFMaskedStore, [AtomPort0]>; +def : WriteRes<WriteFMaskedStoreY, [AtomPort0]>; + def : WriteRes<WriteFMove, [AtomPort01]>; defm : X86WriteRes<WriteEMMS,[AtomPort01], 5, [5], 1>; @@ -282,8 +288,14 @@ def : WriteRes<WriteCvtF2FSt, [AtomPort0]>; // NOTE: Doesn't exist on Atom. // Vector integer operations. //////////////////////////////////////////////////////////////////////////////// -def : WriteRes<WriteVecLoad, [AtomPort0]>; -def : WriteRes<WriteVecStore, [AtomPort0]>; +def : WriteRes<WriteVecLoad, [AtomPort0]>; +def : WriteRes<WriteVecMaskedLoad, [AtomPort0]>; +def : WriteRes<WriteVecMaskedLoadY, [AtomPort0]>; + +def : WriteRes<WriteVecStore, [AtomPort0]>; +def : WriteRes<WriteVecMaskedStore, [AtomPort0]>; +def : WriteRes<WriteVecMaskedStoreY, [AtomPort0]>; + def : WriteRes<WriteVecMove, [AtomPort01]>; defm : AtomWriteResPair<WriteVecALU, [AtomPort01], [AtomPort0], 1, 1>; diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index 2f490e2c139..a38a2671dc6 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -311,8 +311,14 @@ def : WriteRes<WriteNop, [JALU01]> { let Latency = 1; } // Floating point. This covers both scalar and vector operations. //////////////////////////////////////////////////////////////////////////////// -def : WriteRes<WriteFLoad, [JLAGU, JFPU01, JFPX]> { let Latency = 5; } -def : WriteRes<WriteFStore, [JSAGU, JFPU1, JSTC]>; +defm : X86WriteRes<WriteFLoad, [JLAGU, JFPU01, JFPX], 5, [1, 1, 1], 1>; +defm : X86WriteRes<WriteFMaskedLoad, [JLAGU, JFPU01, JFPX], 6, [1, 1, 2], 1>; +defm : X86WriteRes<WriteFMaskedLoadY, [JLAGU, JFPU01, JFPX], 6, [2, 2, 4], 2>; + +defm : X86WriteRes<WriteFStore, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>; +defm : X86WriteRes<WriteFMaskedStore, [JSAGU, JFPU01, JFPX], 6, [1, 1, 4], 1>; +defm : X86WriteRes<WriteFMaskedStoreY, [JSAGU, JFPU01, JFPX], 6, [2, 2, 4], 2>; + def : WriteRes<WriteFMove, [JFPU01, JFPX]>; def : WriteRes<WriteEMMS, [JFPU01, JFPX]> { let Latency = 2; } @@ -434,8 +440,14 @@ def : InstRW<[JWriteCVTSI2FLd], (instregex "(V)?CVTSI(64)?2S(D|S)rm")>; // Vector integer operations. //////////////////////////////////////////////////////////////////////////////// -def : WriteRes<WriteVecLoad, [JLAGU, JFPU01, JVALU]> { let Latency = 5; } -def : WriteRes<WriteVecStore, [JSAGU, JFPU1, JSTC]>; +defm : X86WriteRes<WriteVecLoad, [JLAGU, JFPU01, JVALU], 5, [1, 1, 1], 1>; +defm : X86WriteRes<WriteVecMaskedLoad, [JLAGU, JFPU01, JVALU], 6, [1, 1, 2], 1>; +defm : X86WriteRes<WriteVecMaskedLoadY, [JLAGU, JFPU01, JVALU], 6, [2, 2, 4], 2>; + +defm : X86WriteRes<WriteVecStore, [JSAGU, JFPU1, JSTC], 1, [1, 1, 1], 1>; +defm : X86WriteRes<WriteVecMaskedStore, [JSAGU, JFPU01, JVALU], 6, [1, 1, 4], 1>; +defm : X86WriteRes<WriteVecMaskedStoreY, [JSAGU, JFPU01, JVALU], 6, [2, 2, 4], 2>; + def : WriteRes<WriteVecMove, [JFPU01, JVALU]>; defm : JWriteResFpuPair<WriteVecALU, [JFPU01, JVALU], 1>; @@ -622,32 +634,6 @@ def JWriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> { def : InstRW<[JWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm, VBROADCASTSSYrm)>; -def JWriteVMaskMovLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> { - let Latency = 6; - let ResourceCycles = [1, 1, 2]; -} -def : InstRW<[JWriteVMaskMovLd], (instrs VMASKMOVPDrm, VMASKMOVPSrm)>; - -def JWriteVMaskMovYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> { - let Latency = 6; - let ResourceCycles = [2, 2, 4]; - let NumMicroOps = 2; -} -def : InstRW<[JWriteVMaskMovYLd], (instrs VMASKMOVPDYrm, VMASKMOVPSYrm)>; - -def JWriteVMaskMovSt: SchedWriteRes<[JFPU01, JFPX, JSAGU]> { - let Latency = 6; - let ResourceCycles = [1, 4, 1]; -} -def : InstRW<[JWriteVMaskMovSt], (instrs VMASKMOVPDmr, VMASKMOVPSmr)>; - -def JWriteVMaskMovYSt: SchedWriteRes<[JFPU01, JFPX, JSAGU]> { - let Latency = 6; - let ResourceCycles = [2, 4, 2]; - let NumMicroOps = 2; -} -def : InstRW<[JWriteVMaskMovYSt], (instrs VMASKMOVPDYmr, VMASKMOVPSYmr)>; - def JWriteJVZEROALL: SchedWriteRes<[]> { let Latency = 90; let NumMicroOps = 73; diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index 51acffb0bb8..2f814159a89 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -125,10 +125,14 @@ defm : SLMWriteResPair<WriteBZHI, [SLM_IEC_RSV0], 1>; defm : SLMWriteResPair<WriteIDiv, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>; // Scalar and vector floating point. -def : WriteRes<WriteFStore, [SLM_FPC_RSV01, SLM_MEC_RSV]>; -def : WriteRes<WriteFLoad, [SLM_MEC_RSV]> { let Latency = 3; } -def : WriteRes<WriteFMove, [SLM_FPC_RSV01]>; -defm : X86WriteRes<WriteEMMS, [SLM_FPC_RSV01], 10, [10], 9>; +def : WriteRes<WriteFLoad, [SLM_MEC_RSV]> { let Latency = 3; } +def : WriteRes<WriteFMaskedLoad, [SLM_MEC_RSV]> { let Latency = 3; } +def : WriteRes<WriteFMaskedLoadY, [SLM_MEC_RSV]> { let Latency = 3; } +def : WriteRes<WriteFStore, [SLM_FPC_RSV01, SLM_MEC_RSV]>; +def : WriteRes<WriteFMaskedStore, [SLM_FPC_RSV01, SLM_MEC_RSV]>; +def : WriteRes<WriteFMaskedStoreY, [SLM_FPC_RSV01, SLM_MEC_RSV]>; +def : WriteRes<WriteFMove, [SLM_FPC_RSV01]>; +defm : X86WriteRes<WriteEMMS, [SLM_FPC_RSV01], 10, [10], 9>; defm : SLMWriteResPair<WriteFAdd, [SLM_FPC_RSV1], 3>; defm : SLMWriteResPair<WriteFAddX, [SLM_FPC_RSV1], 3>; @@ -193,9 +197,13 @@ defm : SLMWriteResPair<WriteFBlend, [SLM_FPC_RSV0], 1>; def : WriteRes<WriteCvtF2FSt, [SLM_FPC_RSV01, SLM_MEC_RSV]>; // Vector integer operations. -def : WriteRes<WriteVecStore, [SLM_FPC_RSV01, SLM_MEC_RSV]>; -def : WriteRes<WriteVecLoad, [SLM_MEC_RSV]> { let Latency = 3; } -def : WriteRes<WriteVecMove, [SLM_FPC_RSV01]>; +def : WriteRes<WriteVecLoad, [SLM_MEC_RSV]> { let Latency = 3; } +def : WriteRes<WriteVecMaskedLoad, [SLM_MEC_RSV]> { let Latency = 3; } +def : WriteRes<WriteVecMaskedLoadY, [SLM_MEC_RSV]> { let Latency = 3; } +def : WriteRes<WriteVecStore, [SLM_FPC_RSV01, SLM_MEC_RSV]>; +def : WriteRes<WriteVecMaskedStore, [SLM_FPC_RSV01, SLM_MEC_RSV]>; +def : WriteRes<WriteVecMaskedStoreY, [SLM_FPC_RSV01, SLM_MEC_RSV]>; +def : WriteRes<WriteVecMove, [SLM_FPC_RSV01]>; defm : SLMWriteResPair<WriteVecShift, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteVecShiftX, [SLM_FPC_RSV0], 1>; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index e9d265de3f8..4bd259775b8 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -188,9 +188,13 @@ def : WriteRes<WriteIMulH, [ZnALU1, ZnMultiplier]>{ } // Floating point operations -def : WriteRes<WriteFStore, [ZnAGU]>; -def : WriteRes<WriteFMove, [ZnFPU]>; -def : WriteRes<WriteFLoad, [ZnAGU]> { let Latency = 8; } +defm : X86WriteRes<WriteFLoad, [ZnAGU], 8, [1], 1>; +defm : X86WriteRes<WriteFMaskedLoad, [ZnAGU,ZnFPU01], 8, [1,1], 1>; +defm : X86WriteRes<WriteFMaskedLoadY, [ZnAGU,ZnFPU01], 8, [1,2], 2>; +defm : X86WriteRes<WriteFStore, [ZnAGU], 1, [1,1], 1>; +defm : X86WriteRes<WriteFMaskedStore, [ZnAGU,ZnFPU01], 4, [1,1], 1>; +defm : X86WriteRes<WriteFMaskedStoreY, [ZnAGU,ZnFPU01], 5, [1,2], 2>; +defm : X86WriteRes<WriteFMove, [ZnFPU], 1, [1], 1>; defm : ZnWriteResFpuPair<WriteFAdd, [ZnFPU0], 3>; defm : ZnWriteResFpuPair<WriteFAddX, [ZnFPU0], 3>; @@ -260,10 +264,14 @@ defm : ZnWriteResFpuPair<WriteFSqrt80, [ZnFPU3], 20, [20]>; def : WriteRes<WriteCvtF2FSt, [ZnFPU3, ZnAGU]>; // Vector integer operations which uses FPU units -def : WriteRes<WriteVecStore, [ZnAGU]>; -def : WriteRes<WriteVecMove, [ZnFPU]>; -def : WriteRes<WriteVecLoad, [ZnAGU]> { let Latency = 8; } -def : WriteRes<WriteEMMS, [ZnFPU]> { let Latency = 2; } +defm : X86WriteRes<WriteVecLoad, [ZnAGU], 8, [1], 1>; +defm : X86WriteRes<WriteVecMaskedLoad, [ZnAGU,ZnFPU01], 8, [1,2], 2>; +defm : X86WriteRes<WriteVecMaskedLoadY, [ZnAGU,ZnFPU01], 9, [1,3], 2>; +defm : X86WriteRes<WriteVecStore, [ZnAGU], 1, [1,1], 1>; +defm : X86WriteRes<WriteVecMaskedStore, [ZnAGU,ZnFPU01], 4, [1,1], 1>; +defm : X86WriteRes<WriteVecMaskedStoreY, [ZnAGU,ZnFPU01], 5, [1,2], 2>; +defm : X86WriteRes<WriteVecMove, [ZnFPU], 1, [1], 1>; +defm : X86WriteRes<WriteEMMS, [ZnFPU], 2, [1], 1>; defm : ZnWriteResFpuPair<WriteVecShift, [ZnFPU], 1>; defm : ZnWriteResFpuPair<WriteVecShiftX, [ZnFPU2], 1>; @@ -1030,11 +1038,8 @@ def : InstRW<[WriteMicrocoded], (instregex "MMX_MASKMOVQ(64)?")>; // MASKMOVDQU. def : InstRW<[WriteMicrocoded], (instregex "(V?)MASKMOVDQU(64)?")>; -// VPMASKMOVQ. +// VPMASKMOVD. // ymm -def : InstRW<[ZnWriteFPU01Op2],(instregex "VPMASKMOVQrm")>; -def : InstRW<[ZnWriteFPU01Op2Y],(instregex "VPMASKMOVQYrm")>; - def : InstRW<[WriteMicrocoded], (instregex "VPMASKMOVD(Y?)rm")>; // m, v,v. @@ -1168,32 +1173,6 @@ def ZnWriteVINSERT128Ld: SchedWriteRes<[ZnAGU,ZnFPU013]> { def : InstRW<[ZnWriteVINSERT128r], (instregex "VINSERTF128rr")>; def : InstRW<[ZnWriteVINSERT128Ld], (instregex "VINSERTF128rm")>; -// VMASKMOVP S/D. -// x,x,m. -def ZnWriteVMASKMOVPLd : SchedWriteRes<[ZnAGU, ZnFPU01]> { - let Latency = 8; -} -// y,y,m. -def ZnWriteVMASKMOVPLdY : SchedWriteRes<[ZnAGU, ZnFPU01]> { - let Latency = 8; - let NumMicroOps = 2; - let ResourceCycles = [1, 2]; -} -def ZnWriteVMASKMOVPm : SchedWriteRes<[ZnAGU, ZnFPU01]> { - let Latency = 4; -} -def : InstRW<[ZnWriteVMASKMOVPLd], (instregex "VMASKMOVP(S|D)rm")>; -def : InstRW<[ZnWriteVMASKMOVPLdY], (instregex "VMASKMOVP(S|D)Yrm")>; -def : InstRW<[ZnWriteVMASKMOVPm], (instregex "VMASKMOVP(S|D)mr")>; - -// m256,y,y. -def ZnWriteVMASKMOVPYmr : SchedWriteRes<[ZnAGU,ZnFPU01]> { - let Latency = 5; - let NumMicroOps = 2; - let ResourceCycles = [1, 2]; -} -def : InstRW<[ZnWriteVMASKMOVPYmr], (instregex "VMASKMOVP(S|D)Ymr")>; - // VGATHERDPS. // x. def : InstRW<[WriteMicrocoded], (instregex "VGATHERDPSrm")>; diff --git a/llvm/test/CodeGen/X86/avx2-schedule.ll b/llvm/test/CodeGen/X86/avx2-schedule.ll index 193c3197291..60dc340b817 100644 --- a/llvm/test/CodeGen/X86/avx2-schedule.ll +++ b/llvm/test/CodeGen/X86/avx2-schedule.ll @@ -3384,8 +3384,8 @@ declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) nounwind readn define <4 x i32> @test_pmaskmovd(i8* %a0, <4 x i32> %a1, <4 x i32> %a2) { ; GENERIC-LABEL: test_pmaskmovd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [6:0.50] -; GENERIC-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [1:1.00] +; GENERIC-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [8:1.00] +; GENERIC-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [5:1.00] ; GENERIC-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3433,8 +3433,8 @@ declare void @llvm.x86.avx2.maskstore.d(i8*, <4 x i32>, <4 x i32>) nounwind define <8 x i32> @test_pmaskmovd_ymm(i8* %a0, <8 x i32> %a1, <8 x i32> %a2) { ; GENERIC-LABEL: test_pmaskmovd_ymm: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [6:0.50] -; GENERIC-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [1:1.00] +; GENERIC-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [9:1.00] +; GENERIC-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [5:1.00] ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3482,8 +3482,8 @@ declare void @llvm.x86.avx2.maskstore.d.256(i8*, <8 x i32>, <8 x i32>) nounwind define <2 x i64> @test_pmaskmovq(i8* %a0, <2 x i64> %a1, <2 x i64> %a2) { ; GENERIC-LABEL: test_pmaskmovq: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [6:0.50] -; GENERIC-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [1:1.00] +; GENERIC-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [8:1.00] +; GENERIC-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [5:1.00] ; GENERIC-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3531,8 +3531,8 @@ declare void @llvm.x86.avx2.maskstore.q(i8*, <2 x i64>, <2 x i64>) nounwind define <4 x i64> @test_pmaskmovq_ymm(i8* %a0, <4 x i64> %a1, <4 x i64> %a2) { ; GENERIC-LABEL: test_pmaskmovq_ymm: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [6:0.50] -; GENERIC-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [1:1.00] +; GENERIC-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [9:1.00] +; GENERIC-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [5:1.00] ; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; |

