diff options
| author | Anton Korobeynikov <asl@math.spbu.ru> | 2010-04-07 18:19:40 +0000 |
|---|---|---|
| committer | Anton Korobeynikov <asl@math.spbu.ru> | 2010-04-07 18:19:40 +0000 |
| commit | baeb210be780995bee7e9bb00208bfc40027fe25 (patch) | |
| tree | 3b709844bde4ec7ea0cbcbcd7edb1e2acad77ee9 /llvm/lib | |
| parent | 0bdc6345e8525788412057f39bc07febbb5e244b (diff) | |
| download | bcm5719-llvm-baeb210be780995bee7e9bb00208bfc40027fe25.tar.gz bcm5719-llvm-baeb210be780995bee7e9bb00208bfc40027fe25.zip | |
Make use of new reserved/required scheduling stuff: introduce VFP and NEON locks to model domain cross stalls precisly.
llvm-svn: 100646
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/ARM/ARMSchedule.td | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/ARM/ARMScheduleV7.td | 97 |
2 files changed, 77 insertions, 22 deletions
diff --git a/llvm/lib/Target/ARM/ARMSchedule.td b/llvm/lib/Target/ARM/ARMSchedule.td index fc4c5f5830b..db15a85e40f 100644 --- a/llvm/lib/Target/ARM/ARMSchedule.td +++ b/llvm/lib/Target/ARM/ARMSchedule.td @@ -17,6 +17,8 @@ def FU_LdSt0 : FuncUnit; // pipeline 0 load/store def FU_LdSt1 : FuncUnit; // pipeline 1 load/store def FU_NPipe : FuncUnit; // NEON ALU/MUL pipe def FU_NLSPipe : FuncUnit; // NEON LS pipe +def FU_DRegsVFP: FuncUnit; // FP register set, VFP side +def FU_DRegsN : FuncUnit; // FP register set, NEON side //===----------------------------------------------------------------------===// // Instruction Itinerary classes used for ARM diff --git a/llvm/lib/Target/ARM/ARMScheduleV7.td b/llvm/lib/Target/ARM/ARMScheduleV7.td index b121045dd5b..0d7a5539c1d 100644 --- a/llvm/lib/Target/ARM/ARMScheduleV7.td +++ b/llvm/lib/Target/ARM/ARMScheduleV7.td @@ -593,94 +593,147 @@ def CortexA8Itineraries : ProcessorItineraries<[ // Dual issue pipeline represented by FU_Pipe0 | FU_Pipe1 // def CortexA9Itineraries : ProcessorItineraries<[ + // VFP and NEON shares the same register file. This means that every VFP + // instruction should wait for full completion of the consecutive NEON + // instruction and vice-versa. We model this behavior with two artificial FUs: + // DRegsVFP and DRegsVFP. + // + // Every VFP instruction: + // - Acquires DRegsVFP resource for 1 cycle + // - Reserves DRegsN resource for the whole duration. + // Every NEON instruction does the same but with FUs swapped. + // + // Since the reserved FU cannot be acquired this models precisly "cross-domain" + // stalls. // VFP // Issue through integer pipeline, and execute in NEON unit. - // // FP Special Register to Integer Register File Move - InstrItinData<IIC_fpSTAT , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpSTAT , [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<2, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>]>, // // Single-precision FP Unary - InstrItinData<IIC_fpUNA32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpUNA32 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<2, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [1, 1]>, // // Double-precision FP Unary - InstrItinData<IIC_fpUNA64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpUNA64 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<2, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [1, 1]>, // // Single-precision FP Compare - InstrItinData<IIC_fpCMP32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpCMP32 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<2, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [1, 1]>, // // Double-precision FP Compare - InstrItinData<IIC_fpCMP64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpCMP64 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<2, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [1, 1]>, // // Single to Double FP Convert - InstrItinData<IIC_fpCVTSD , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpCVTSD , [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<5, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [4, 1]>, // // Double to Single FP Convert - InstrItinData<IIC_fpCVTDS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpCVTDS , [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<5, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [4, 1]>, // // Single-Precision FP to Integer Convert - InstrItinData<IIC_fpCVTSI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpCVTSI , [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<5, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [4, 1]>, // // Double-Precision FP to Integer Convert - InstrItinData<IIC_fpCVTDI , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpCVTDI , [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<5, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [4, 1]>, // // Integer to Single-Precision FP Convert - InstrItinData<IIC_fpCVTIS , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpCVTIS , [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<5, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [4, 1]>, // // Integer to Double-Precision FP Convert - InstrItinData<IIC_fpCVTID , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpCVTID , [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<5, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [4, 1]>, // // Single-precision FP ALU - InstrItinData<IIC_fpALU32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpALU32 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<5, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [4, 1, 1]>, // // Double-precision FP ALU - InstrItinData<IIC_fpALU64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpALU64 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<5, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [4, 1, 1]>, // // Single-precision FP Multiply - InstrItinData<IIC_fpMUL32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpMUL32 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<6, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [5, 1, 1]>, // // Double-precision FP Multiply - InstrItinData<IIC_fpMUL64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpMUL64 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<7, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<2, [FU_NPipe]>], [6, 1, 1]>, // // Single-precision FP MAC - InstrItinData<IIC_fpMAC32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpMAC32 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<9, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_NPipe]>], [8, 0, 1, 1]>, // // Double-precision FP MAC - InstrItinData<IIC_fpMAC64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpMAC64 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<10, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<2, [FU_NPipe]>], [9, 0, 1, 1]>, // // Single-precision FP DIV - InstrItinData<IIC_fpDIV32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpDIV32 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<16, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<10, [FU_NPipe]>], [15, 1, 1]>, // // Double-precision FP DIV - InstrItinData<IIC_fpDIV64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpDIV64 , [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<26, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<20, [FU_NPipe]>], [25, 1, 1]>, // // Single-precision FP SQRT - InstrItinData<IIC_fpSQRT32, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpSQRT32, [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<18, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<13, [FU_NPipe]>], [17, 1]>, // // Double-precision FP SQRT - InstrItinData<IIC_fpSQRT64, [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, + InstrItinData<IIC_fpSQRT64, [InstrStage2<1, [FU_DRegsVFP], 0, Required>, + InstrStage2<33, [FU_DRegsN], 0, Reserved>, + InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<28, [FU_NPipe]>], [32, 1]> ]>; |

