Allow a zero cycle stage to reserve/require a FU without advancing the cycle counter.

llvm-svn: 78736
author: David Goodwin <david_goodwin@apple.com> 2009-08-11 22:38:43 +0000
committer: David Goodwin <david_goodwin@apple.com> 2009-08-11 22:38:43 +0000
commit: fd5defed1dff8eaeeb145eb3c507cc173267c0b6 (patch)
tree: f878caa2682434dbccdbb49e528d0d06d737b694 /llvm/lib/Target
parent: 854d7dec5af72886ef389dd3298a4d14070e220a (diff)
download: bcm5719-llvm-fd5defed1dff8eaeeb145eb3c507cc173267c0b6.tar.gz
bcm5719-llvm-fd5defed1dff8eaeeb145eb3c507cc173267c0b6.zip
4 files changed, 59 insertions, 39 deletions
diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td
index ce28149b54f..eb6304c448e 100644
--- a/llvm/lib/Target/ARM/ARM.td
+++ b/llvm/lib/Target/ARM/ARM.td
@@ -114,7 +114,7 @@ def : Processor<"arm1156t2f-s",     V6Itineraries,
 // V7 Processors.
 def : Processor<"cortex-a8",        CortexA8Itineraries,
                 [ArchV7A, FeatureThumb2, FeatureNEON, FeatureNEONFP]>;
-def : Processor<"cortex-a9",        V7Itineraries,
+def : Processor<"cortex-a9",        CortexA9Itineraries,
                 [ArchV7A, FeatureThumb2, FeatureNEON]>;
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/ARM/ARMSchedule.td b/llvm/lib/Target/ARM/ARMSchedule.td
index a5ca773ef1d..11a7b2a717a 100644
--- a/llvm/lib/Target/ARM/ARMSchedule.td
+++ b/llvm/lib/Target/ARM/ARMSchedule.td
@@ -10,8 +10,9 @@
 //===----------------------------------------------------------------------===//
 // Functional units across ARM processors
 //
-def FU_Pipe0   : FuncUnit; // pipeline 0 issue
-def FU_Pipe1   : FuncUnit; // pipeline 1 issue
+def FU_Issue   : FuncUnit; // issue
+def FU_Pipe0   : FuncUnit; // pipeline 0
+def FU_Pipe1   : FuncUnit; // pipeline 1
 def FU_LdSt0   : FuncUnit; // pipeline 0 load/store
 def FU_LdSt1   : FuncUnit; // pipeline 1 load/store
 
@@ -19,9 +20,11 @@ def FU_LdSt1   : FuncUnit; // pipeline 1 load/store
 // Instruction Itinerary classes used for ARM
 //
 def IIC_iALU    : InstrItinClass;
+def IIC_iMPY    : InstrItinClass;
 def IIC_iLoad   : InstrItinClass;
 def IIC_iStore  : InstrItinClass;
 def IIC_fpALU   : InstrItinClass;
+def IIC_fpMPY   : InstrItinClass;
 def IIC_fpLoad  : InstrItinClass;
 def IIC_fpStore : InstrItinClass;
 def IIC_Br      : InstrItinClass;
@@ -31,12 +34,14 @@ def IIC_Br      : InstrItinClass;
 
 def GenericItineraries : ProcessorItineraries<[
   InstrItinData<IIC_iALU    , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iMPY    , [InstrStage<1, [FU_Pipe0]>]>,
   InstrItinData<IIC_iLoad   , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
-  InstrItinData<IIC_fpLoad  , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
   InstrItinData<IIC_iStore  , [InstrStage<1, [FU_Pipe0]>]>,
-  InstrItinData<IIC_fpStore , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_Br      , [InstrStage<1, [FU_Pipe0]>]>,
   InstrItinData<IIC_fpALU   , [InstrStage<1, [FU_Pipe0]>]>,
-  InstrItinData<IIC_Br      , [InstrStage<1, [FU_Pipe0]>]>
+  InstrItinData<IIC_fpMPY   , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_fpLoad  , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
+  InstrItinData<IIC_fpStore , [InstrStage<1, [FU_Pipe0]>]>
 ]>;
 
 
diff --git a/llvm/lib/Target/ARM/ARMScheduleV6.td b/llvm/lib/Target/ARM/ARMScheduleV6.td
index f0b8116a038..755547a678a 100644
--- a/llvm/lib/Target/ARM/ARMScheduleV6.td
+++ b/llvm/lib/Target/ARM/ARMScheduleV6.td
@@ -11,18 +11,16 @@
 //
 //===----------------------------------------------------------------------===//
 
+// TODO: this should model an ARM11
 // Single issue pipeline so every itinerary starts with FU_pipe0
 def V6Itineraries : ProcessorItineraries<[
-  // single-cycle integer ALU
   InstrItinData<IIC_iALU    , [InstrStage<1, [FU_Pipe0]>]>,
-  // loads have an extra cycle of latency, but are fully pipelined
+  InstrItinData<IIC_iMPY    , [InstrStage<1, [FU_Pipe0]>]>,
   InstrItinData<IIC_iLoad   , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
-  InstrItinData<IIC_fpLoad  , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
-  // fully-pipelined stores
   InstrItinData<IIC_iStore  , [InstrStage<1, [FU_Pipe0]>]>,
-  InstrItinData<IIC_fpStore , [InstrStage<1, [FU_Pipe0]>]>,
-  // fp ALU is not pipelined
-  InstrItinData<IIC_fpALU   , [InstrStage<6, [FU_Pipe0]>]>,
-  // no delay slots, so the latency of a branch is unimportant
-  InstrItinData<IIC_Br      , [InstrStage<1, [FU_Pipe0]>]>
+  InstrItinData<IIC_Br      , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_fpALU   , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_fpMPY   , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_fpLoad  , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
+  InstrItinData<IIC_fpStore , [InstrStage<1, [FU_Pipe0]>]>
 ]>;
diff --git a/llvm/lib/Target/ARM/ARMScheduleV7.td b/llvm/lib/Target/ARM/ARMScheduleV7.td
index 30360bc9c41..8a7b42eb729 100644
--- a/llvm/lib/Target/ARM/ARMScheduleV7.td
+++ b/llvm/lib/Target/ARM/ARMScheduleV7.td
@@ -11,34 +11,51 @@
 //
 //===----------------------------------------------------------------------===//
 
-// Single issue pipeline so every itinerary starts with FU_Pipe0
-def V7Itineraries : ProcessorItineraries<[
-  // single-cycle integer ALU
-  InstrItinData<IIC_iALU    , [InstrStage<1, [FU_Pipe0]>]>,
-  // loads have an extra cycle of latency, but are fully pipelined
-  InstrItinData<IIC_iLoad   , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
-  InstrItinData<IIC_fpLoad  , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
-  // fully-pipelined stores
-  InstrItinData<IIC_iStore  , [InstrStage<1, [FU_Pipe0]>]>,
-  InstrItinData<IIC_fpStore , [InstrStage<1, [FU_Pipe0]>]>,
-  // fp ALU is not pipelined
-  InstrItinData<IIC_fpALU   , [InstrStage<6, [FU_Pipe0]>]>,
-  // no delay slots, so the latency of a branch is unimportant
-  InstrItinData<IIC_Br      , [InstrStage<1, [FU_Pipe0]>]>
-]>;
-
 // Dual issue pipeline so every itinerary starts with FU_Pipe0 | FU_Pipe1
 def CortexA8Itineraries : ProcessorItineraries<[
-  // single-cycle integer ALU
+  // two fully-pipelined integer ALU pipelines
   InstrItinData<IIC_iALU    , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
+  // one fully-pipelined integer Multiply pipeline
+  // function units are used in alpha order, so use FU_Pipe1
+  // for the Multiple pipeline
+  InstrItinData<IIC_iMPY    , [InstrStage<1, [FU_Pipe1]>]>,
   // loads have an extra cycle of latency, but are fully pipelined
-  InstrItinData<IIC_iLoad   , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_LdSt0]>]>,
-  InstrItinData<IIC_fpLoad  , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_LdSt0]>]>,
+  // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData<IIC_iLoad   , [InstrStage<0, [FU_Issue]>, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0]>]>,
   // fully-pipelined stores
-  InstrItinData<IIC_iStore  , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
-  InstrItinData<IIC_fpStore , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
-  // fp ALU is not pipelined
-  InstrItinData<IIC_fpALU   , [InstrStage<6, [FU_Pipe0, FU_Pipe1]>]>,
+  // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData<IIC_iStore  , [InstrStage<0, [FU_Issue]>, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
   // no delay slots, so the latency of a branch is unimportant
-  InstrItinData<IIC_Br      , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>
+  InstrItinData<IIC_Br      , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
+
+  // VFP ALU is not pipelined so stall all issues 
+  // FIXME assume NFP pipeline and 7 cycle non-pipelined latency
+  InstrItinData<IIC_fpALU   , [InstrStage<7, [FU_Pipe0, FU_Pipe1]>]>,
+  // VFP MPY is not pipelined so stall all issues 
+  // FIXME assume NFP pipeline and 7 cycle non-pipelined latency
+  InstrItinData<IIC_fpMPY   , [InstrStage<7, [FU_Pipe0, FU_Pipe1]>]>,
+  // loads have an extra cycle of latency, but are fully pipelined
+  // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData<IIC_fpLoad  , [InstrStage<0, [FU_Issue]>, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+                               InstrStage<1, [FU_LdSt0]>]>,
+  // use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData<IIC_fpStore , [InstrStage<0, [FU_Issue]>, 
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>
+]>;
+
+// FIXME
+def CortexA9Itineraries : ProcessorItineraries<[
+  InstrItinData<IIC_iALU    , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iMPY    , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_iLoad   , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
+  InstrItinData<IIC_iStore  , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_Br      , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_fpALU   , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_fpMPY   , [InstrStage<1, [FU_Pipe0]>]>,
+  InstrItinData<IIC_fpLoad  , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
+  InstrItinData<IIC_fpStore , [InstrStage<1, [FU_Pipe0]>]>
 ]>;
author	David Goodwin <david_goodwin@apple.com>	2009-08-11 22:38:43 +0000
committer	David Goodwin <david_goodwin@apple.com>	2009-08-11 22:38:43 +0000
commit	fd5defed1dff8eaeeb145eb3c507cc173267c0b6 (patch)
tree	f878caa2682434dbccdbb49e528d0d06d737b694 /llvm/lib/Target
parent	854d7dec5af72886ef389dd3298a4d14070e220a (diff)
download	bcm5719-llvm-fd5defed1dff8eaeeb145eb3c507cc173267c0b6.tar.gz bcm5719-llvm-fd5defed1dff8eaeeb145eb3c507cc173267c0b6.zip