diff options
Diffstat (limited to 'llvm/lib/Target/ARM/ARMScheduleA9.td')
-rw-r--r-- | llvm/lib/Target/ARM/ARMScheduleA9.td | 220 |
1 files changed, 175 insertions, 45 deletions
diff --git a/llvm/lib/Target/ARM/ARMScheduleA9.td b/llvm/lib/Target/ARM/ARMScheduleA9.td index 27745e65300..c199ef7f2b2 100644 --- a/llvm/lib/Target/ARM/ARMScheduleA9.td +++ b/llvm/lib/Target/ARM/ARMScheduleA9.td @@ -24,6 +24,8 @@ def A9_ALU1 : FuncUnit; // ALU pipeline 1 def A9_AGU : FuncUnit; // Address generation unit for ld / st def A9_NPipe : FuncUnit; // NEON pipeline def A9_MUX0 : FuncUnit; // AGU + NEON/FPU multiplexer +def A9_LS0 : FuncUnit; // L/S Units, 32-bit per unit. Fake FU to limit l/s. +def A9_LS1 : FuncUnit; // L/S Units, 32-bit per unit. def A9_DRegsVFP: FuncUnit; // FP register set, VFP side def A9_DRegsN : FuncUnit; // FP register set, NEON side @@ -32,7 +34,7 @@ def A9_LdBypass : Bypass; def CortexA9Itineraries : ProcessorItineraries< [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0, - A9_DRegsVFP, A9_DRegsN], + A9_LS0, A9_LS1, A9_DRegsVFP, A9_DRegsN], [A9_LdBypass], [ // Two fully-pipelined integer ALU pipelines @@ -172,87 +174,105 @@ def CortexA9Itineraries : ProcessorItineraries< // Immediate offset InstrItinData<IIC_iLoad_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<1, [A9_AGU]>], + InstrStage<1, [A9_AGU]>, + InstrStage<1, [A9_LS0, A9_LS1]>], [3, 1], [A9_LdBypass]>, InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<2, [A9_AGU]>], + InstrStage<2, [A9_AGU]>, + InstrStage<1, [A9_LS0, A9_LS1]>], [4, 1], [A9_LdBypass]>, // FIXME: If address is 64-bit aligned, AGU cycles is 1. InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<2, [A9_AGU]>], + InstrStage<2, [A9_AGU]>, + InstrStage<1, [A9_LS0, A9_LS1]>], [3, 3, 1], [A9_LdBypass]>, // // Register offset InstrItinData<IIC_iLoad_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<1, [A9_AGU]>], + InstrStage<1, [A9_AGU]>, + InstrStage<1, [A9_LS0, A9_LS1]>], [3, 1, 1], [A9_LdBypass]>, InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<2, [A9_AGU]>], + InstrStage<2, [A9_AGU]>, + InstrStage<1, [A9_LS0, A9_LS1]>], [4, 1, 1], [A9_LdBypass]>, InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<2, [A9_AGU]>], + InstrStage<2, [A9_AGU]>, + InstrStage<1, [A9_LS0, A9_LS1]>], [3, 3, 1, 1], [A9_LdBypass]>, // // Scaled register offset InstrItinData<IIC_iLoad_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<1, [A9_AGU]>], + InstrStage<1, [A9_AGU]>, + InstrStage<1, [A9_LS0, A9_LS1]>], [4, 1, 1], [A9_LdBypass]>, InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<2, [A9_AGU]>], + InstrStage<2, [A9_AGU]>, + InstrStage<1, [A9_LS0, A9_LS1]>], [5, 1, 1], [A9_LdBypass]>, // // Immediate offset with update InstrItinData<IIC_iLoad_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<1, [A9_AGU]>], + InstrStage<1, [A9_AGU]>, + InstrStage<1, [A9_LS0, A9_LS1]>], [3, 2, 1], [A9_LdBypass]>, InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<2, [A9_AGU]>], + InstrStage<2, [A9_AGU]>, + InstrStage<1, [A9_LS0, A9_LS1]>], [4, 3, 1], [A9_LdBypass]>, // // Register offset with update InstrItinData<IIC_iLoad_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<1, [A9_AGU]>], + InstrStage<1, [A9_AGU]>, + InstrStage<1, [A9_LS0, A9_LS1]>], [3, 2, 1, 1], [A9_LdBypass]>, InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<2, [A9_AGU]>], + InstrStage<2, [A9_AGU]>, + InstrStage<1, [A9_LS0, A9_LS1]>], [4, 3, 1, 1], [A9_LdBypass]>, InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<2, [A9_AGU]>], + InstrStage<2, [A9_AGU]>, + InstrStage<1, [A9_LS0, A9_LS1]>], [3, 3, 1, 1], [A9_LdBypass]>, // // Scaled register offset with update InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<1, [A9_AGU]>], + InstrStage<1, [A9_AGU]>, + InstrStage<1, [A9_LS0, A9_LS1]>], [4, 3, 1, 1], [A9_LdBypass]>, InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<2, [A9_AGU]>], + InstrStage<2, [A9_AGU]>, + InstrStage<1, [A9_LS0, A9_LS1]>], [5, 4, 1, 1], [A9_LdBypass]>, // // Load multiple, def is the 5th operand. + // FIXME: This assumes 3 to 4 registers. InstrItinData<IIC_iLoad_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<2, [A9_AGU]>], + InstrStage<2, [A9_AGU]>, + InstrStage<2, [A9_LS0, A9_LS1]>], [1, 1, 1, 1, 3], [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>, // // Load multiple + update, defs are the 1st and 5th operands. InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<2, [A9_AGU]>], + InstrStage<2, [A9_AGU]>, + InstrStage<2, [A9_LS0, A9_LS1]>], [2, 1, 1, 1, 3], [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>, // @@ -260,6 +280,7 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_AGU]>, + InstrStage<2, [A9_LS0, A9_LS1]>, InstrStage<1, [A9_Branch]>], [1, 2, 1, 1, 3], [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>, @@ -267,7 +288,8 @@ def CortexA9Itineraries : ProcessorItineraries< // Pop, def is the 3rd operand. InstrItinData<IIC_iPop , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<2, [A9_AGU]>], + InstrStage<2, [A9_AGU]>, + InstrStage<2, [A9_LS0, A9_LS1]>], [1, 1, 3], [NoBypass, NoBypass, A9_LdBypass]>, // @@ -275,6 +297,7 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_iPop_Br, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<2, [A9_AGU]>, + InstrStage<2, [A9_LS0, A9_LS1]>, InstrStage<1, [A9_Branch]>], [1, 1, 3], [NoBypass, NoBypass, A9_LdBypass]>, @@ -284,6 +307,7 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_AGU]>, + InstrStage<1, [A9_LS0, A9_LS1]>, InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>, @@ -292,75 +316,92 @@ def CortexA9Itineraries : ProcessorItineraries< // Immediate offset InstrItinData<IIC_iStore_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<1, [A9_AGU]>], [1, 1]>, + InstrStage<1, [A9_AGU]>, + InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1]>, InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<2, [A9_AGU]>], [1, 1]>, + InstrStage<2, [A9_AGU]>, + InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1]>, // FIXME: If address is 64-bit aligned, AGU cycles is 1. InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<2, [A9_AGU]>], [1, 1]>, + InstrStage<2, [A9_AGU]>, + InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1]>, // // Register offset InstrItinData<IIC_iStore_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<1, [A9_AGU]>], [1, 1, 1]>, + InstrStage<1, [A9_AGU]>, + InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1, 1]>, InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<2, [A9_AGU]>], [1, 1, 1]>, + InstrStage<2, [A9_AGU]>, + InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1, 1]>, InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<2, [A9_AGU]>], [1, 1, 1]>, + InstrStage<2, [A9_AGU]>, + InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1, 1]>, // // Scaled register offset InstrItinData<IIC_iStore_si , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<1, [A9_AGU]>], [1, 1, 1]>, + InstrStage<1, [A9_AGU]>, + InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1, 1]>, InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<2, [A9_AGU]>], [1, 1, 1]>, + InstrStage<2, [A9_AGU]>, + InstrStage<1, [A9_LS0, A9_LS1]>], [1, 1, 1]>, // // Immediate offset with update InstrItinData<IIC_iStore_iu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<1, [A9_AGU]>], [2, 1, 1]>, + InstrStage<1, [A9_AGU]>, + InstrStage<1, [A9_LS0, A9_LS1]>], [2, 1, 1]>, InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<2, [A9_AGU]>], [3, 1, 1]>, + InstrStage<2, [A9_AGU]>, + InstrStage<1, [A9_LS0, A9_LS1]>], [3, 1, 1]>, // // Register offset with update InstrItinData<IIC_iStore_ru , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<1, [A9_AGU]>], + InstrStage<1, [A9_AGU]>, + InstrStage<1, [A9_LS0, A9_LS1]>], [2, 1, 1, 1]>, InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<2, [A9_AGU]>], + InstrStage<2, [A9_AGU]>, + InstrStage<1, [A9_LS0, A9_LS1]>], [3, 1, 1, 1]>, InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<2, [A9_AGU]>], + InstrStage<2, [A9_AGU]>, + InstrStage<1, [A9_LS0, A9_LS1]>], [3, 1, 1, 1]>, // // Scaled register offset with update InstrItinData<IIC_iStore_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<1, [A9_AGU]>], + InstrStage<1, [A9_AGU]>, + InstrStage<1, [A9_LS0, A9_LS1]>], [2, 1, 1, 1]>, InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<2, [A9_AGU]>], + InstrStage<2, [A9_AGU]>, + InstrStage<1, [A9_LS0, A9_LS1]>], [3, 1, 1, 1]>, // // Store multiple InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<1, [A9_AGU]>]>, + InstrStage<1, [A9_AGU]>, + InstrStage<2, [A9_LS0, A9_LS1]>]>, // // Store multiple + update InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<1, [A9_AGU]>], [2]>, + InstrStage<1, [A9_AGU]>, + InstrStage<2, [A9_LS0, A9_LS1]>], [2]>, // Branch // @@ -672,24 +713,113 @@ def CortexA9Itineraries : ProcessorItineraries< InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_NPipe]>], [2, 1, 1, 1]>, // NEON - // Issue through integer pipeline, and execute in NEON unit. // VLD1 - // FIXME: We don't model this instruction properly + // FIXME: Conservatively assume insufficent alignment. InstrItinData<IIC_VLD1, [InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<1, [A9_NPipe]>]>, + InstrStage<2, [A9_NPipe]>], + [2, 1]>, + // VLD1x2 + InstrItinData<IIC_VLD1x2, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<2, [A9_NPipe]>], + [2, 2, 1]>, + // VLD1x3 + InstrItinData<IIC_VLD1x3, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<3, [A9_NPipe]>], + [2, 2, 3, 1]>, + // VLD1x4 + InstrItinData<IIC_VLD1x4, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<3, [A9_NPipe]>], + [2, 2, 3, 3, 1]>, + // VLD1u + InstrItinData<IIC_VLD1u, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<2, [A9_NPipe]>], + [2, 2, 1]>, + // VLD1x2u + InstrItinData<IIC_VLD1x2u, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<2, [A9_NPipe]>], + [2, 2, 2, 1]>, + // VLD1x3u + InstrItinData<IIC_VLD1x3u, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<3, [A9_NPipe]>], + [2, 2, 3, 2, 1]>, + // VLD1x4u + InstrItinData<IIC_VLD1x4u, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<3, [A9_NPipe]>], + [2, 2, 3, 3, 2, 1]>, // // VLD2 - // FIXME: We don't model this instruction properly InstrItinData<IIC_VLD2, [InstrStage<1, [A9_DRegsN], 0, Required>, - // Extra latency cycles since wbck is 6 cycles - InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, - InstrStage<1, [A9_NPipe]>], - [2, 2, 1]>, + InstrStage<2, [A9_NPipe]>], + [3, 3, 1]>, + // + // VLD2x2 + InstrItinData<IIC_VLD2x2, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<3, [A9_NPipe]>], + [3, 4, 3, 4, 1]>, + // + // VLD2ln + InstrItinData<IIC_VLD2ln, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<3, [A9_NPipe]>], + [4, 4, 1, 1, 1, 1]>, + // + // VLD2u + InstrItinData<IIC_VLD2u, [InstrStage<1, [A9_DRegsN], 0, Required>, + // Extra latency cycles since wbck is 7 cycles + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<2, [A9_NPipe]>], + [3, 3, 2, 1, 1, 1]>, + // + // VLD2x2u + InstrItinData<IIC_VLD2x2u, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<3, [A9_NPipe]>], + [3, 4, 3, 4, 2, 1]>, + // + // VLD2lnu + InstrItinData<IIC_VLD2lnu, [InstrStage<1, [A9_DRegsN], 0, Required>, + InstrStage<9, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_Issue0, A9_Issue1], 0>, + InstrStage<1, [A9_MUX0], 0>, + InstrStage<3, [A9_NPipe]>], + [4, 4, 2, 1, 1, 1, 1, 1]>, // // VLD3 // FIXME: We don't model this instruction properly |