summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2018-03-28 10:49:33 +0000
committerAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2018-03-28 10:49:33 +0000
commit010924e35cedc390f7f2df31928ac9b19f754d6c (patch)
tree7bba5d47976ea008b1af235376c8c75ff01f05aa /llvm/lib
parent5a841234904b5138d1ce0bc7ad2ee69ea0d25792 (diff)
downloadbcm5719-llvm-010924e35cedc390f7f2df31928ac9b19f754d6c.tar.gz
bcm5719-llvm-010924e35cedc390f7f2df31928ac9b19f754d6c.zip
[X86][BtVer2] Fix the number of micro opcodes for a bunch of YMM instructions.
The Jaguar backend natively supports 128-bit data types. Operations on YMM registers are split into two COPs (complex operations). Each COP consumes a slot in the dispatch group, and in the reorder buffer. The scheduling model for Jaguar should mark those instructions as `let NumMicroOps = 2`. This was found when testing AVX code for BtVer2 using llvm-mca. llvm-svn: 328694
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ScheduleBtVer2.td12
1 files changed, 12 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
index 4d8838372fe..1b4d1ad73fa 100644
--- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
@@ -547,6 +547,7 @@ def : InstRW<[JWriteVDPPSYLd, ReadAfterLd], (instrs VDPPSYrmi)>;
def JWriteFAddY: SchedWriteRes<[JFPU0, JFPA]> {
let Latency = 3;
let ResourceCycles = [2, 2];
+ let NumMicroOps = 2;
}
def : InstRW<[JWriteFAddY], (instrs VADDPDYrr, VADDPSYrr,
VSUBPDYrr, VSUBPSYrr,
@@ -555,6 +556,7 @@ def : InstRW<[JWriteFAddY], (instrs VADDPDYrr, VADDPSYrr,
def JWriteFAddYLd: SchedWriteRes<[JLAGU, JFPU0, JFPA]> {
let Latency = 8;
let ResourceCycles = [2, 2, 2];
+ let NumMicroOps = 2;
}
def : InstRW<[JWriteFAddYLd, ReadAfterLd], (instrs VADDPDYrm, VADDPSYrm,
VSUBPDYrm, VSUBPSYrm,
@@ -563,36 +565,42 @@ def : InstRW<[JWriteFAddYLd, ReadAfterLd], (instrs VADDPDYrm, VADDPSYrm,
def JWriteFDivY: SchedWriteRes<[JFPU1, JFPM]> {
let Latency = 38;
let ResourceCycles = [2, 38];
+ let NumMicroOps = 2;
}
def : InstRW<[JWriteFDivY], (instrs VDIVPDYrr, VDIVPSYrr)>;
def JWriteFDivYLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> {
let Latency = 43;
let ResourceCycles = [2, 2, 38];
+ let NumMicroOps = 2;
}
def : InstRW<[JWriteFDivYLd, ReadAfterLd], (instrs VDIVPDYrm, VDIVPSYrm)>;
def JWriteVMULYPD: SchedWriteRes<[JFPU1, JFPM]> {
let Latency = 4;
let ResourceCycles = [2, 4];
+ let NumMicroOps = 2;
}
def : InstRW<[JWriteVMULYPD], (instrs VMULPDYrr)>;
def JWriteVMULYPDLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> {
let Latency = 9;
let ResourceCycles = [2, 2, 4];
+ let NumMicroOps = 2;
}
def : InstRW<[JWriteVMULYPDLd, ReadAfterLd], (instrs VMULPDYrm)>;
def JWriteVMULYPS: SchedWriteRes<[JFPU1, JFPM]> {
let Latency = 2;
let ResourceCycles = [2, 2];
+ let NumMicroOps = 2;
}
def : InstRW<[JWriteVMULYPS], (instrs VMULPSYrr, VRCPPSYr, VRSQRTPSYr)>;
def JWriteVMULYPSLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> {
let Latency = 7;
let ResourceCycles = [2, 2, 2];
+ let NumMicroOps = 2;
}
def : InstRW<[JWriteVMULYPSLd, ReadAfterLd], (instrs VMULPSYrm, VRCPPSYm, VRSQRTPSYm)>;
@@ -611,6 +619,7 @@ def : InstRW<[JWriteVMULPDLd], (instrs MULPDrm, MULSDrm, VMULPDrm, VMULSDrm)>;
def JWriteVCVTY: SchedWriteRes<[JFPU1, JSTC]> {
let Latency = 3;
let ResourceCycles = [2, 2];
+ let NumMicroOps = 2;
}
def : InstRW<[JWriteVCVTY], (instrs VCVTDQ2PDYrr, VCVTDQ2PSYrr,
VCVTPS2DQYrr, VCVTTPS2DQYrr,
@@ -619,6 +628,7 @@ def : InstRW<[JWriteVCVTY], (instrs VCVTDQ2PDYrr, VCVTDQ2PSYrr,
def JWriteVCVTYLd: SchedWriteRes<[JLAGU, JFPU1, JSTC]> {
let Latency = 8;
let ResourceCycles = [2, 2, 2];
+ let NumMicroOps = 2;
}
def : InstRW<[JWriteVCVTYLd, ReadAfterLd], (instrs VCVTDQ2PDYrm, VCVTDQ2PSYrm,
VCVTPS2DQYrm, VCVTTPS2DQYrm,
@@ -834,12 +844,14 @@ def : InstRW<[JWriteVSQRTYPDLd], (instrs VSQRTPDYm)>;
def JWriteVSQRTYPS: SchedWriteRes<[JFPU1, JFPM]> {
let Latency = 42;
let ResourceCycles = [2, 42];
+ let NumMicroOps = 2;
}
def : InstRW<[JWriteVSQRTYPS], (instrs VSQRTPSYr)>;
def JWriteVSQRTYPSLd: SchedWriteRes<[JLAGU, JFPU1, JFPM]> {
let Latency = 47;
let ResourceCycles = [2, 2, 42];
+ let NumMicroOps = 2;
}
def : InstRW<[JWriteVSQRTYPSLd], (instrs VSQRTPSYm)>;
OpenPOWER on IntegriCloud