summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2010-09-29 22:42:35 +0000
committerEvan Cheng <evan.cheng@apple.com>2010-09-29 22:42:35 +0000
commit4a010fd1eac964a9d60fa87dfc1841dcb666335b (patch)
treef96f67bd6eb3406c7d39592d271b4c6d8455c652 /llvm/lib
parent2016f0eaacab149a4bfdba5f500e13d03f08846e (diff)
downloadbcm5719-llvm-4a010fd1eac964a9d60fa87dfc1841dcb666335b.tar.gz
bcm5719-llvm-4a010fd1eac964a9d60fa87dfc1841dcb666335b.zip
Model Cortex-a9 load to SUB, RSB, ADD, ADC, SBC, RSC, CMN, MVN, or CMP
pipeline forwarding path. llvm-svn: 115098
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/ScheduleDAGInstrs.cpp37
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp34
-rw-r--r--llvm/lib/Target/ARM/ARMInstrThumb2.td4
-rw-r--r--llvm/lib/Target/ARM/ARMSchedule.td1
-rw-r--r--llvm/lib/Target/ARM/ARMScheduleA8.td1
-rw-r--r--llvm/lib/Target/ARM/ARMScheduleA9.td69
6 files changed, 86 insertions, 60 deletions
diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index da0b0562e12..3d2565dd18c 100644
--- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -527,26 +527,23 @@ void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
MachineInstr *DefMI = Def->getInstr();
int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
if (DefIdx != -1) {
- int DefCycle = InstrItins->getOperandCycle(DefMI->getDesc().getSchedClass(),
- DefIdx);
- if (DefCycle >= 0) {
- MachineInstr *UseMI = Use->getInstr();
- const unsigned UseClass = UseMI->getDesc().getSchedClass();
-
- // For all uses of the register, calculate the maxmimum latency
- int Latency = -1;
- for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = UseMI->getOperand(i);
- if (!MO.isReg() || !MO.isUse())
- continue;
- unsigned MOReg = MO.getReg();
- if (MOReg != Reg)
- continue;
-
- int UseCycle = InstrItins->getOperandCycle(UseClass, i);
- if (UseCycle >= 0)
- Latency = std::max(Latency, DefCycle - UseCycle + 1);
- }
+ unsigned DefClass = DefMI->getDesc().getSchedClass();
+ MachineInstr *UseMI = Use->getInstr();
+ unsigned UseClass = UseMI->getDesc().getSchedClass();
+
+ // For all uses of the register, calculate the maxmimum latency
+ int Latency = -1;
+ for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = UseMI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (MOReg != Reg)
+ continue;
+
+ int UseCycle = InstrItins->getOperandLatency(DefClass, DefIdx,
+ UseClass, i);
+ Latency = std::max(Latency, UseCycle);
// If we found a latency, then replace the existing dependence latency.
if (Latency >= 0)
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index fbf621d0bb4..23ff9c5807c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -457,24 +457,24 @@ void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
return;
unsigned DefIdx = Use->getOperand(OpIdx).getResNo();
- if (Def->isMachineOpcode()) {
- const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
- if (DefIdx >= II.getNumDefs())
- return;
- int DefCycle = InstrItins->getOperandCycle(II.getSchedClass(), DefIdx);
- if (DefCycle < 0)
- return;
- int UseCycle = 1;
- if (Use->isMachineOpcode()) {
- const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass();
- UseCycle = InstrItins->getOperandCycle(UseClass, OpIdx);
- }
- if (UseCycle >= 0) {
- int Latency = DefCycle - UseCycle + 1;
- if (Latency >= 0)
- dep.setLatency(Latency);
- }
+ if (!Def->isMachineOpcode())
+ return;
+
+ const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
+ if (DefIdx >= II.getNumDefs())
+ return;
+
+ int Latency = 0;
+ if (!Use->isMachineOpcode()) {
+ Latency = InstrItins->getOperandCycle(II.getSchedClass(), DefIdx);
+ } else {
+ unsigned DefClass = II.getSchedClass();
+ unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass();
+ Latency = InstrItins->getOperandLatency(DefClass, DefIdx, UseClass, OpIdx);
}
+
+ if (Latency >= 0)
+ dep.setLatency(Latency);
}
void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index 5ca21aa9197..25eca70d38f 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -285,7 +285,7 @@ multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> {
let Inst{5-4} = 0b00; // type
}
// shifted register
- def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi,
+ def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_reg:$lhs), IIC_iALUsir,
opc, "\t$dst, $rhs, $lhs",
[(set rGPR:$dst, (opnode t2_so_reg:$lhs, rGPR:$rhs))]> {
let Inst{31-27} = 0b11101;
@@ -1698,7 +1698,7 @@ defm t2ORN : T2I_bin_irs<0b0011, "orn",
// Prefer over of t2EORri ra, rb, -1 because mvn has 16-bit version
let AddedComplexity = 1 in
defm t2MVN : T2I_un_irs <0b0011, "mvn",
- IIC_iMOVi, IIC_iMOVr, IIC_iMOVsi,
+ IIC_iMVNi, IIC_iMVNr, IIC_iMVNsi,
UnOpFrag<(not node:$Src)>, 1, 1>;
diff --git a/llvm/lib/Target/ARM/ARMSchedule.td b/llvm/lib/Target/ARM/ARMSchedule.td
index 133a81b195d..00d148b8eda 100644
--- a/llvm/lib/Target/ARM/ARMSchedule.td
+++ b/llvm/lib/Target/ARM/ARMSchedule.td
@@ -14,6 +14,7 @@ def IIC_iALUx : InstrItinClass;
def IIC_iALUi : InstrItinClass;
def IIC_iALUr : InstrItinClass;
def IIC_iALUsi : InstrItinClass;
+def IIC_iALUsir : InstrItinClass;
def IIC_iALUsr : InstrItinClass;
def IIC_iBITi : InstrItinClass;
def IIC_iBITr : InstrItinClass;
diff --git a/llvm/lib/Target/ARM/ARMScheduleA8.td b/llvm/lib/Target/ARM/ARMScheduleA8.td
index e6b2beae038..8962ec93efa 100644
--- a/llvm/lib/Target/ARM/ARMScheduleA8.td
+++ b/llvm/lib/Target/ARM/ARMScheduleA8.td
@@ -36,6 +36,7 @@ def CortexA8Itineraries : ProcessorItineraries<
InstrItinData<IIC_iALUi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
InstrItinData<IIC_iALUr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
InstrItinData<IIC_iALUsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
+ InstrItinData<IIC_iALUsir,[InstrStage<1,[A8_Pipe0, A8_Pipe1]>], [2, 1, 2]>,
InstrItinData<IIC_iALUsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
//
// Bitwise Instructions that produce a result
diff --git a/llvm/lib/Target/ARM/ARMScheduleA9.td b/llvm/lib/Target/ARM/ARMScheduleA9.td
index 14197c824da..1f4b8d1ab07 100644
--- a/llvm/lib/Target/ARM/ARMScheduleA9.td
+++ b/llvm/lib/Target/ARM/ARMScheduleA9.td
@@ -23,10 +23,14 @@ def A9_NPipe : FuncUnit; // NEON ALU/MUL pipe
def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
def A9_DRegsN : FuncUnit; // FP register set, NEON side
+// Bypasses
+def A9_LdBypass : Bypass;
+
// Dual issue pipeline represented by A9_Pipe0 | A9_Pipe1
//
def CortexA9Itineraries : ProcessorItineraries<
- [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1], [], [
+ [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1],
+ [A9_LdBypass], [
// Two fully-pipelined integer ALU pipelines
//
@@ -39,19 +43,30 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
//
// MVN instructions
- InstrItinData<IIC_iMVNi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1]>,
- InstrItinData<IIC_iMVNr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
- InstrItinData<IIC_iMVNsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
- InstrItinData<IIC_iMVNsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
+ InstrItinData<IIC_iMVNi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
+ [1]>,
+ InstrItinData<IIC_iMVNr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
+ [1, 1], [NoBypass, A9_LdBypass]>,
+ InstrItinData<IIC_iMVNsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
+ [1, 1]>,
+ InstrItinData<IIC_iMVNsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>],
+ [2, 2, 1]>,
//
// No operand cycles
InstrItinData<IIC_iALUx , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
//
// Binary Instructions that produce a result
- InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
- InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2, 2]>,
- InstrItinData<IIC_iALUsi, [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
- InstrItinData<IIC_iALUsr,[InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1, 1]>,
+ InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
+ [2, 2], [NoBypass, A9_LdBypass]>,
+ InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
+ [2, 2, 2], [NoBypass, A9_LdBypass, A9_LdBypass]>,
+ InstrItinData<IIC_iALUsi, [InstrStage<2, [A9_Pipe0, A9_Pipe1]>],
+ [2, 2, 1], [NoBypass, A9_LdBypass, NoBypass]>,
+ InstrItinData<IIC_iALUsir,[InstrStage<2, [A9_Pipe0, A9_Pipe1]>],
+ [2, 1, 2], [NoBypass, NoBypass, A9_LdBypass]>,
+ InstrItinData<IIC_iALUsr,[InstrStage<3, [A9_Pipe0, A9_Pipe1]>],
+ [2, 2, 1, 1],
+ [NoBypass, A9_LdBypass, NoBypass, NoBypass]>,
//
// Bitwise Instructions that produce a result
InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
@@ -69,10 +84,14 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_iEXTAsr,[InstrStage<2, [A9_Pipe0, A9_Pipe1]>],[3, 1, 1, 1]>,
//
// Compare instructions
- InstrItinData<IIC_iCMPi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
- InstrItinData<IIC_iCMPr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
- InstrItinData<IIC_iCMPsi , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
- InstrItinData<IIC_iCMPsr , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
+ InstrItinData<IIC_iCMPi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
+ [2], [A9_LdBypass]>,
+ InstrItinData<IIC_iCMPr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
+ [2, 2], [A9_LdBypass, A9_LdBypass]>,
+ InstrItinData<IIC_iCMPsi , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>],
+ [2, 1], [A9_LdBypass, NoBypass]>,
+ InstrItinData<IIC_iCMPsr , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>],
+ [2, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>,
//
// Test instructions
InstrItinData<IIC_iTSTi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
@@ -105,31 +124,38 @@ def CortexA9Itineraries : ProcessorItineraries<
//
// Immediate offset
InstrItinData<IIC_iLoadi , [InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe]>], [3, 1]>,
+ InstrStage<1, [A9_LSPipe]>],
+ [3, 1], [A9_LdBypass]>,
//
// Register offset
InstrItinData<IIC_iLoadr , [InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>,
+ InstrStage<1, [A9_LSPipe]>],
+ [3, 1, 1], [A9_LdBypass]>,
//
// Scaled register offset
InstrItinData<IIC_iLoadsi , [InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_LSPipe]>], [4, 1, 1]>,
+ InstrStage<2, [A9_LSPipe]>],
+ [4, 1, 1], [A9_LdBypass]>,
//
// Immediate offset with update
InstrItinData<IIC_iLoadiu , [InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_LSPipe]>], [3, 2, 1]>,
+ InstrStage<2, [A9_LSPipe]>],
+ [3, 2, 1], [A9_LdBypass]>,
//
// Register offset with update
InstrItinData<IIC_iLoadru , [InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_LSPipe]>], [3, 2, 1, 1]>,
+ InstrStage<2, [A9_LSPipe]>],
+ [3, 2, 1, 1], [A9_LdBypass]>,
//
// Scaled register offset with update
InstrItinData<IIC_iLoadsiu , [InstrStage<1, [A9_Pipe1]>,
- InstrStage<2, [A9_LSPipe]>], [4, 3, 1, 1]>,
+ InstrStage<2, [A9_LSPipe]>],
+ [4, 3, 1, 1], [A9_LdBypass]>,
//
// Load multiple
InstrItinData<IIC_iLoadm , [InstrStage<1, [A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe]>]>,
+ InstrStage<1, [A9_LSPipe]>],
+ [3], [A9_LdBypass]>,
//
// Load multiple plus branch
@@ -141,7 +167,8 @@ def CortexA9Itineraries : ProcessorItineraries<
// iLoadi + iALUr for t2LDRpci_pic.
InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_LSPipe]>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [4, 1]>,
+ InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
+ [2, 1]>,
// Integer store pipeline
///
OpenPOWER on IntegriCloud