Make use of new reserved/required scheduling stuff: introduce VFP and NEON locks to model domain cross stalls precisly.

llvm-svn: 100646
author: Anton Korobeynikov <asl@math.spbu.ru> 2010-04-07 18:19:40 +0000
committer: Anton Korobeynikov <asl@math.spbu.ru> 2010-04-07 18:19:40 +0000
commit: baeb210be780995bee7e9bb00208bfc40027fe25 (patch)
tree: 3b709844bde4ec7ea0cbcbcd7edb1e2acad77ee9 /llvm/lib
parent: 0bdc6345e8525788412057f39bc07febbb5e244b (diff)
download: bcm5719-llvm-baeb210be780995bee7e9bb00208bfc40027fe25.tar.gz
bcm5719-llvm-baeb210be780995bee7e9bb00208bfc40027fe25.zip
2 files changed, 77 insertions, 22 deletions
diff --git a/llvm/lib/Target/ARM/ARMSchedule.td b/llvm/lib/Target/ARM/ARMSchedule.td
index fc4c5f5830b..db15a85e40f 100644
--- a/llvm/lib/Target/ARM/ARMSchedule.td
+++ b/llvm/lib/Target/ARM/ARMSchedule.td
@@ -17,6 +17,8 @@ def FU_LdSt0   : FuncUnit; // pipeline 0 load/store
 def FU_LdSt1   : FuncUnit; // pipeline 1 load/store
 def FU_NPipe   : FuncUnit; // NEON ALU/MUL pipe
 def FU_NLSPipe : FuncUnit; // NEON LS pipe
+def FU_DRegsVFP: FuncUnit; // FP register set, VFP side
+def FU_DRegsN  : FuncUnit; // FP register set, NEON side
 
 //===----------------------------------------------------------------------===//
 // Instruction Itinerary classes used for ARM
diff --git a/llvm/lib/Target/ARM/ARMScheduleV7.td b/llvm/lib/Target/ARM/ARMScheduleV7.td
index b121045dd5b..0d7a5539c1d 100644
--- a/llvm/lib/Target/ARM/ARMScheduleV7.td
+++ b/llvm/lib/Target/ARM/ARMScheduleV7.td
@@ -593,94 +593,147 @@ def CortexA8Itineraries : ProcessorItineraries<[
 // Dual issue pipeline represented by FU_Pipe0 | FU_Pipe1
 //
 def CortexA9Itineraries : ProcessorItineraries<[
+  // VFP and NEON shares the same register file. This means that every VFP
+  // instruction should wait for full completion of the consecutive NEON
+  // instruction and vice-versa. We model this behavior with two artificial FUs:
+  // DRegsVFP and DRegsVFP.
+  //
+  // Every VFP instruction:
+  //  - Acquires DRegsVFP resource for 1 cycle
+  //  - Reserves DRegsN resource for the whole duration.
+  // Every NEON instruction does the same but with FUs swapped.
+  //
+  // Since the reserved FU cannot be acquired this models precisly "cross-domain"
+  // stalls.
 
   // VFP
   // Issue through integer pipeline, and execute in NEON unit.
-  //
 
   // FP Special Register to Integer Register File Move
-  InstrItinData<IIC_fpSTAT , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpSTAT , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                              InstrStage2<2, [FU_DRegsN],   0, Reserved>,
+                              InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                               InstrStage<1, [FU_NPipe]>]>,
   //
   // Single-precision FP Unary
-  InstrItinData<IIC_fpUNA32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpUNA32 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<2, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [1, 1]>,
   //
   // Double-precision FP Unary
-  InstrItinData<IIC_fpUNA64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpUNA64 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<2, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [1, 1]>,
 
   //
   // Single-precision FP Compare
-  InstrItinData<IIC_fpCMP32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpCMP32 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<2, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [1, 1]>,
   //
   // Double-precision FP Compare
-  InstrItinData<IIC_fpCMP64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpCMP64 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<2, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [1, 1]>,
   //
   // Single to Double FP Convert
-  InstrItinData<IIC_fpCVTSD , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpCVTSD , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [4, 1]>,
   //
   // Double to Single FP Convert
-  InstrItinData<IIC_fpCVTDS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpCVTDS , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [4, 1]>,
   //
   // Single-Precision FP to Integer Convert
-  InstrItinData<IIC_fpCVTSI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpCVTSI , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [4, 1]>,
   //
   // Double-Precision FP to Integer Convert
-  InstrItinData<IIC_fpCVTDI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpCVTDI , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [4, 1]>,
   //
   // Integer to Single-Precision FP Convert
-  InstrItinData<IIC_fpCVTIS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpCVTIS , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [4, 1]>,
   //
   // Integer to Double-Precision FP Convert
-  InstrItinData<IIC_fpCVTID , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpCVTID , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [4, 1]>,
   //
   // Single-precision FP ALU
-  InstrItinData<IIC_fpALU32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpALU32 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
   //
   // Double-precision FP ALU
-  InstrItinData<IIC_fpALU64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpALU64 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<5, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [4, 1, 1]>,
   //
   // Single-precision FP Multiply
-  InstrItinData<IIC_fpMUL32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpMUL32 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<6, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [5, 1, 1]>,
   //
   // Double-precision FP Multiply
-  InstrItinData<IIC_fpMUL64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpMUL64 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<7, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<2, [FU_NPipe]>], [6, 1, 1]>,
   //
   // Single-precision FP MAC
-  InstrItinData<IIC_fpMAC32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpMAC32 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<9, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<1, [FU_NPipe]>], [8, 0, 1, 1]>,
   //
   // Double-precision FP MAC
-  InstrItinData<IIC_fpMAC64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpMAC64 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<10, [FU_DRegsN],  0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<2, [FU_NPipe]>], [9, 0, 1, 1]>,
   //
   // Single-precision FP DIV
-  InstrItinData<IIC_fpDIV32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpDIV32 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<16, [FU_DRegsN],  0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<10, [FU_NPipe]>], [15, 1, 1]>,
   //
   // Double-precision FP DIV
-  InstrItinData<IIC_fpDIV64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpDIV64 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<26, [FU_DRegsN],  0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<20, [FU_NPipe]>], [25, 1, 1]>,
   //
   // Single-precision FP SQRT
-  InstrItinData<IIC_fpSQRT32, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpSQRT32, [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<18, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<13, [FU_NPipe]>], [17, 1]>,
   //
   // Double-precision FP SQRT
-  InstrItinData<IIC_fpSQRT64, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
+  InstrItinData<IIC_fpSQRT64, [InstrStage2<1, [FU_DRegsVFP], 0, Required>,
+                               InstrStage2<33, [FU_DRegsN],   0, Reserved>,
+                               InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
                                InstrStage<28, [FU_NPipe]>], [32, 1]>
 ]>;
author	Anton Korobeynikov <asl@math.spbu.ru>	2010-04-07 18:19:40 +0000
committer	Anton Korobeynikov <asl@math.spbu.ru>	2010-04-07 18:19:40 +0000
commit	baeb210be780995bee7e9bb00208bfc40027fe25 (patch)
tree	3b709844bde4ec7ea0cbcbcd7edb1e2acad77ee9 /llvm/lib
parent	0bdc6345e8525788412057f39bc07febbb5e244b (diff)
download	bcm5719-llvm-baeb210be780995bee7e9bb00208bfc40027fe25.tar.gz bcm5719-llvm-baeb210be780995bee7e9bb00208bfc40027fe25.zip