diff options
| author | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2018-03-28 10:49:33 +0000 |
|---|---|---|
| committer | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2018-03-28 10:49:33 +0000 |
| commit | 010924e35cedc390f7f2df31928ac9b19f754d6c (patch) | |
| tree | 7bba5d47976ea008b1af235376c8c75ff01f05aa /llvm/lib | |
| parent | 5a841234904b5138d1ce0bc7ad2ee69ea0d25792 (diff) | |
| download | bcm5719-llvm-010924e35cedc390f7f2df31928ac9b19f754d6c.tar.gz bcm5719-llvm-010924e35cedc390f7f2df31928ac9b19f754d6c.zip | |
[X86][BtVer2] Fix the number of micro opcodes for a bunch of YMM instructions.
The Jaguar backend natively supports 128-bit data types. Operations on YMM
registers are split into two COPs (complex operations). Each COP consumes a slot
in the dispatch group, and in the reorder buffer.
The scheduling model for Jaguar should mark those instructions as `let
NumMicroOps = 2`.
This was found when testing AVX code for BtVer2 using llvm-mca.
llvm-svn: 328694
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleBtVer2.td | 12 |
1 files changed, 12 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index 4d8838372fe..1b4d1ad73fa 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -547,6 +547,7 @@ def : InstRW<[JWriteVDPPSYLd, ReadAfterLd], (instrs VDPPSYrmi)>; def JWriteFAddY: SchedWriteRes<[JFPU0, JFPA]> { let Latency = 3; let ResourceCycles = [2, 2]; + let NumMicroOps = 2; } def : InstRW<[JWriteFAddY], (instrs VADDPDYrr, VADDPSYrr, VSUBPDYrr, VSUBPSYrr, @@ -555,6 +556,7 @@ def : InstRW<[JWriteFAddY], (instrs VADDPDYrr, VADDPSYrr, def JWriteFAddYLd: SchedWriteRes<[JLAGU, JFPU0, JFPA]> { let Latency = 8; let ResourceCycles = [2, 2, 2]; + let NumMicroOps = 2; } def : InstRW<[JWriteFAddYLd, ReadAfterLd], (instrs VADDPDYrm, VADDPSYrm, VSUBPDYrm, VSUBPSYrm, @@ -563,36 +565,42 @@ def : InstRW<[JWriteFAddYLd, ReadAfterLd], (instrs VADDPDYrm, VADDPSYrm, def JWriteFDivY: SchedWriteRes<[JFPU1, JFPM]> { let Latency = 38; let ResourceCycles = [2, 38]; + let NumMicroOps = 2; } def : InstRW<[JWriteFDivY], (instrs VDIVPDYrr, VDIVPSYrr)>; def JWriteFDivYLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> { let Latency = 43; let ResourceCycles = [2, 2, 38]; + let NumMicroOps = 2; } def : InstRW<[JWriteFDivYLd, ReadAfterLd], (instrs VDIVPDYrm, VDIVPSYrm)>; def JWriteVMULYPD: SchedWriteRes<[JFPU1, JFPM]> { let Latency = 4; let ResourceCycles = [2, 4]; + let NumMicroOps = 2; } def : InstRW<[JWriteVMULYPD], (instrs VMULPDYrr)>; def JWriteVMULYPDLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> { let Latency = 9; let ResourceCycles = [2, 2, 4]; + let NumMicroOps = 2; } def : InstRW<[JWriteVMULYPDLd, ReadAfterLd], (instrs VMULPDYrm)>; def JWriteVMULYPS: SchedWriteRes<[JFPU1, JFPM]> { let Latency = 2; let ResourceCycles = [2, 2]; + let NumMicroOps = 2; } def : InstRW<[JWriteVMULYPS], (instrs VMULPSYrr, VRCPPSYr, VRSQRTPSYr)>; def JWriteVMULYPSLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> { let Latency = 7; let ResourceCycles = [2, 2, 2]; + let NumMicroOps = 2; } def : InstRW<[JWriteVMULYPSLd, ReadAfterLd], (instrs VMULPSYrm, VRCPPSYm, VRSQRTPSYm)>; @@ -611,6 +619,7 @@ def : InstRW<[JWriteVMULPDLd], (instrs MULPDrm, MULSDrm, VMULPDrm, VMULSDrm)>; def JWriteVCVTY: SchedWriteRes<[JFPU1, JSTC]> { let Latency = 3; let ResourceCycles = [2, 2]; + let NumMicroOps = 2; } def : InstRW<[JWriteVCVTY], (instrs VCVTDQ2PDYrr, VCVTDQ2PSYrr, VCVTPS2DQYrr, VCVTTPS2DQYrr, @@ -619,6 +628,7 @@ def : InstRW<[JWriteVCVTY], (instrs VCVTDQ2PDYrr, VCVTDQ2PSYrr, def JWriteVCVTYLd: SchedWriteRes<[JLAGU, JFPU1, JSTC]> { let Latency = 8; let ResourceCycles = [2, 2, 2]; + let NumMicroOps = 2; } def : InstRW<[JWriteVCVTYLd, ReadAfterLd], (instrs VCVTDQ2PDYrm, VCVTDQ2PSYrm, VCVTPS2DQYrm, VCVTTPS2DQYrm, @@ -834,12 +844,14 @@ def : InstRW<[JWriteVSQRTYPDLd], (instrs VSQRTPDYm)>; def JWriteVSQRTYPS: SchedWriteRes<[JFPU1, JFPM]> { let Latency = 42; let ResourceCycles = [2, 42]; + let NumMicroOps = 2; } def : InstRW<[JWriteVSQRTYPS], (instrs VSQRTPSYr)>; def JWriteVSQRTYPSLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> { let Latency = 47; let ResourceCycles = [2, 2, 42]; + let NumMicroOps = 2; } def : InstRW<[JWriteVSQRTYPSLd], (instrs VSQRTPSYm)>; |

