diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-03-19 14:46:07 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-03-19 14:46:07 +0000 |
| commit | 30c38c38497763d5660fde146e1185c0dbb082d5 (patch) | |
| tree | 4ab35d4a334b1c9129e18e7ad03fdaa2700bd28e /llvm/lib/Target/X86/X86ScheduleZnver1.td | |
| parent | 10fe9bc79eb38319cf6faf7fe4ffe131b23419af (diff) | |
| download | bcm5719-llvm-30c38c38497763d5660fde146e1185c0dbb082d5.tar.gz bcm5719-llvm-30c38c38497763d5660fde146e1185c0dbb082d5.zip | |
[X86] Generalize schedule classes to support multiple stages
Currently the WriteResPair style multi-classes take a single pipeline stage and latency, this patch generalizes this to make it easier to create complex schedules with ResourceCycles and NumMicroOps be overriden from their defaults.
This has already been done for the Jaguar scheduler to remove a number of custom schedule classes and adding it to the other x86 targets will make it much tidier as we add additional classes in the future to try and replace so many custom cases.
I've converted some instructions but a lot of the models need a bit of cleanup after the patch has been committed - memory latencies not being consistent, the class not actually being used when we could remove some/all customs, etc. I'd prefer to keep this as NFC as possible so later patches can be smaller and target specific.
Differential Revision: https://reviews.llvm.org/D44612
llvm-svn: 327855
Diffstat (limited to 'llvm/lib/Target/X86/X86ScheduleZnver1.td')
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleZnver1.td | 105 |
1 files changed, 55 insertions, 50 deletions
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index 4ad05f3a16e..c43dae41c7e 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -99,30 +99,41 @@ def : ReadAdvance<ReadAfterLd, 4>; // b. addpd // This multiclass is for folded loads for integer units. multiclass ZnWriteResPair<X86FoldableSchedWrite SchedRW, - ProcResourceKind ExePort, - int Lat> { + list<ProcResourceKind> ExePorts, + int Lat, list<int> Res = [1], int UOps = 1> { // Register variant takes 1-cycle on Execution Port. - def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; } + def : WriteRes<SchedRW, ExePorts> { + let Latency = Lat; + let ResourceCycles = Res; + let NumMicroOps = UOps; + } // Memory variant also uses a cycle on ZnAGU // adds 4 cycles to the latency. - def : WriteRes<SchedRW.Folded, [ZnAGU, ExePort]> { - let NumMicroOps = 2; - let Latency = !add(Lat, 4); + def : WriteRes<SchedRW.Folded, !listconcat([ZnAGU], ExePorts)> { + let Latency = !add(Lat, 4); + let ResourceCycles = !listconcat([1], Res); + let NumMicroOps = !add(UOps, 1); } } // This multiclass is for folded loads for floating point units. multiclass ZnWriteResFpuPair<X86FoldableSchedWrite SchedRW, - ProcResourceKind ExePort, - int Lat> { + list<ProcResourceKind> ExePorts, + int Lat, list<int> Res = [1], int UOps = 1> { // Register variant takes 1-cycle on Execution Port. - def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; } + def : WriteRes<SchedRW, ExePorts> { + let Latency = Lat; + let ResourceCycles = Res; + let NumMicroOps = UOps; + } // Memory variant also uses a cycle on ZnAGU // adds 7 cycles to the latency. - def : WriteRes<SchedRW.Folded, [ZnAGU, ExePort]> { - let Latency = !add(Lat, 7); + def : WriteRes<SchedRW.Folded, !listconcat([ZnAGU], ExePorts)> { + let Latency = !add(Lat, 7); + let ResourceCycles = !listconcat([1], Res); + let NumMicroOps = UOps; } } @@ -136,9 +147,10 @@ def : WriteRes<WriteLoad, [ZnAGU]> { let Latency = 8; } def : WriteRes<WriteZero, []>; def : WriteRes<WriteLEA, [ZnALU]>; -defm : ZnWriteResPair<WriteALU, ZnALU, 1>; -defm : ZnWriteResPair<WriteShift, ZnALU, 1>; -defm : ZnWriteResPair<WriteJump, ZnALU, 1>; +defm : ZnWriteResPair<WriteALU, [ZnALU], 1>; +defm : ZnWriteResPair<WriteIMul, [ZnALU1, ZnMultiplier], 4>; +defm : ZnWriteResPair<WriteShift, [ZnALU], 1>; +defm : ZnWriteResPair<WriteJump, [ZnALU], 1>; // Treat misc copies as a move. def : InstRW<[WriteMove], (instrs COPY)>; @@ -154,67 +166,60 @@ def : WriteRes<WriteIDivLd, [ZnALU2, ZnAGU, ZnDivider]> { let ResourceCycles = [1, 4, 41]; } -// IMUL +// IMULH def : WriteRes<WriteIMulH, [ZnALU1, ZnMultiplier]>{ let Latency = 4; } -def : WriteRes<WriteIMul, [ZnALU1, ZnMultiplier]> { - let Latency = 4; -} - -def : WriteRes<WriteIMulLd,[ZnALU1, ZnMultiplier]> { - let Latency = 8; -} // Floating point operations def : WriteRes<WriteFStore, [ZnAGU]>; def : WriteRes<WriteFMove, [ZnFPU]>; def : WriteRes<WriteFLoad, [ZnAGU]> { let Latency = 8; } -defm : ZnWriteResFpuPair<WriteFHAdd, ZnFPU0, 3>; -defm : ZnWriteResFpuPair<WriteFAdd, ZnFPU0, 3>; -defm : ZnWriteResFpuPair<WriteFBlend, ZnFPU01, 1>; -defm : ZnWriteResFpuPair<WriteFVarBlend, ZnFPU01, 1>; -defm : ZnWriteResFpuPair<WriteVarBlend, ZnFPU0, 1>; -defm : ZnWriteResFpuPair<WriteCvtI2F, ZnFPU3, 5>; -defm : ZnWriteResFpuPair<WriteCvtF2F, ZnFPU3, 5>; -defm : ZnWriteResFpuPair<WriteCvtF2I, ZnFPU3, 5>; -defm : ZnWriteResFpuPair<WriteFDiv, ZnFPU3, 15>; -defm : ZnWriteResFpuPair<WriteFShuffle, ZnFPU12, 1>; -defm : ZnWriteResFpuPair<WriteFMul, ZnFPU0, 5>; -defm : ZnWriteResFpuPair<WriteFMA, ZnFPU03, 5>; -defm : ZnWriteResFpuPair<WriteFRcp, ZnFPU01, 5>; -defm : ZnWriteResFpuPair<WriteFRsqrt, ZnFPU01, 5>; -defm : ZnWriteResFpuPair<WriteFSqrt, ZnFPU3, 20>; +defm : ZnWriteResFpuPair<WriteFHAdd, [ZnFPU0], 3>; +defm : ZnWriteResFpuPair<WriteFAdd, [ZnFPU0], 3>; +defm : ZnWriteResFpuPair<WriteFBlend, [ZnFPU01], 1>; +defm : ZnWriteResFpuPair<WriteFVarBlend, [ZnFPU01], 1>; +defm : ZnWriteResFpuPair<WriteVarBlend, [ZnFPU0], 1>; +defm : ZnWriteResFpuPair<WriteCvtI2F, [ZnFPU3], 5>; +defm : ZnWriteResFpuPair<WriteCvtF2F, [ZnFPU3], 5>; +defm : ZnWriteResFpuPair<WriteCvtF2I, [ZnFPU3], 5>; +defm : ZnWriteResFpuPair<WriteFDiv, [ZnFPU3], 15>; +defm : ZnWriteResFpuPair<WriteFShuffle, [ZnFPU12], 1>; +defm : ZnWriteResFpuPair<WriteFMul, [ZnFPU0], 5>; +defm : ZnWriteResFpuPair<WriteFMA, [ZnFPU03], 5>; +defm : ZnWriteResFpuPair<WriteFRcp, [ZnFPU01], 5>; +defm : ZnWriteResFpuPair<WriteFRsqrt, [ZnFPU01], 5>; +defm : ZnWriteResFpuPair<WriteFSqrt, [ZnFPU3], 20>; // Vector integer operations which uses FPU units def : WriteRes<WriteVecStore, [ZnAGU]>; def : WriteRes<WriteVecMove, [ZnFPU]>; def : WriteRes<WriteVecLoad, [ZnAGU]> { let Latency = 8; } -defm : ZnWriteResFpuPair<WriteVecShift, ZnFPU, 1>; -defm : ZnWriteResFpuPair<WriteVecLogic, ZnFPU, 1>; -defm : ZnWriteResFpuPair<WritePHAdd, ZnFPU, 1>; -defm : ZnWriteResFpuPair<WriteVecALU, ZnFPU, 1>; -defm : ZnWriteResFpuPair<WriteVecIMul, ZnFPU0, 4>; -defm : ZnWriteResFpuPair<WriteShuffle, ZnFPU, 1>; -defm : ZnWriteResFpuPair<WriteBlend, ZnFPU01, 1>; -defm : ZnWriteResFpuPair<WriteShuffle256, ZnFPU, 2>; +defm : ZnWriteResFpuPair<WriteVecShift, [ZnFPU], 1>; +defm : ZnWriteResFpuPair<WriteVecLogic, [ZnFPU], 1>; +defm : ZnWriteResFpuPair<WritePHAdd, [ZnFPU], 1>; +defm : ZnWriteResFpuPair<WriteVecALU, [ZnFPU], 1>; +defm : ZnWriteResFpuPair<WriteVecIMul, [ZnFPU0], 4>; +defm : ZnWriteResFpuPair<WriteShuffle, [ZnFPU], 1>; +defm : ZnWriteResFpuPair<WriteBlend, [ZnFPU01], 1>; +defm : ZnWriteResFpuPair<WriteShuffle256, [ZnFPU], 2>; // Vector Shift Operations -defm : ZnWriteResFpuPair<WriteVarVecShift, ZnFPU12, 1>; +defm : ZnWriteResFpuPair<WriteVarVecShift, [ZnFPU12], 1>; // AES Instructions. -defm : ZnWriteResFpuPair<WriteAESDecEnc, ZnFPU01, 4>; -defm : ZnWriteResFpuPair<WriteAESIMC, ZnFPU01, 4>; -defm : ZnWriteResFpuPair<WriteAESKeyGen, ZnFPU01, 4>; +defm : ZnWriteResFpuPair<WriteAESDecEnc, [ZnFPU01], 4>; +defm : ZnWriteResFpuPair<WriteAESIMC, [ZnFPU01], 4>; +defm : ZnWriteResFpuPair<WriteAESKeyGen, [ZnFPU01], 4>; def : WriteRes<WriteFence, [ZnAGU]>; def : WriteRes<WriteNop, []>; // Following instructions with latency=100 are microcoded. // We set long latency so as to block the entire pipeline. -defm : ZnWriteResFpuPair<WriteFShuffle256, ZnFPU, 100>; +defm : ZnWriteResFpuPair<WriteFShuffle256, [ZnFPU], 100>; //Microcoded Instructions let Latency = 100 in { |

