diff options
Diffstat (limited to 'llvm/lib/Target/PowerPC')
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 33 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstrInfo.h | 4 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCScheduleP7.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCScheduleP8.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp | 10 |
5 files changed, 55 insertions, 0 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 696a83860e5..bf6e4029640 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -57,6 +57,10 @@ static cl::opt<bool> VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy", cl::desc("Causes the backend to crash instead of generating a nop VSX copy"), cl::Hidden); +static cl::opt<bool> +UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden, + cl::desc("Use the old (incorrect) instruction latency calculation")); + // Pin the vtable to this file. void PPCInstrInfo::anchor() {} @@ -103,6 +107,35 @@ PPCInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, return new ScoreboardHazardRecognizer(II, DAG); } +unsigned PPCInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, + const MachineInstr *MI, + unsigned *PredCost) const { + if (!ItinData || UseOldLatencyCalc) + return PPCGenInstrInfo::getInstrLatency(ItinData, MI, PredCost); + + // The default implementation of getInstrLatency calls getStageLatency, but + // getStageLatency does not do the right thing for us. While we have + // itinerary, most cores are fully pipelined, and so the itineraries only + // express the first part of the pipeline, not every stage. Instead, we need + // to use the listed output operand cycle number (using operand 0 here, which + // is an output). + + unsigned Latency = 1; + unsigned DefClass = MI->getDesc().getSchedClass(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef() || MO.isImplicit()) + continue; + + int Cycle = ItinData->getOperandCycle(DefClass, i); + if (Cycle < 0) + continue; + + Latency = std::max(Latency, (unsigned) Cycle); + } + + return Latency; +} int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr *DefMI, unsigned DefIdx, diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h index e2d6346aa53..40badae644d 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -95,6 +95,10 @@ public: CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const override; + unsigned getInstrLatency(const InstrItineraryData *ItinData, + const MachineInstr *MI, + unsigned *PredCost = nullptr) const override; + int getOperandLatency(const InstrItineraryData *ItinData, const MachineInstr *DefMI, unsigned DefIdx, const MachineInstr *UseMI, diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP7.td b/llvm/lib/Target/PowerPC/PPCScheduleP7.td index 635d154d10b..267f5672618 100644 --- a/llvm/lib/Target/PowerPC/PPCScheduleP7.td +++ b/llvm/lib/Target/PowerPC/PPCScheduleP7.td @@ -315,6 +315,10 @@ def P7Itineraries : ProcessorItineraries< P7_DU3, P7_DU4], 0>, InstrStage<1, [P7_VS1, P7_VS2]>], [5, 1, 1]>, + InstrItinData<IIC_FPAddSub , [InstrStage<1, [P7_DU1, P7_DU2, + P7_DU3, P7_DU4], 0>, + InstrStage<1, [P7_VS1, P7_VS2]>], + [5, 1, 1]>, InstrItinData<IIC_FPCompare , [InstrStage<1, [P7_DU1, P7_DU2, P7_DU3, P7_DU4], 0>, InstrStage<1, [P7_VS1, P7_VS2]>], diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP8.td b/llvm/lib/Target/PowerPC/PPCScheduleP8.td index 020739baec3..69e6d05c660 100644 --- a/llvm/lib/Target/PowerPC/PPCScheduleP8.td +++ b/llvm/lib/Target/PowerPC/PPCScheduleP8.td @@ -323,6 +323,10 @@ def P8Itineraries : ProcessorItineraries< P8_DU4, P8_DU5, P8_DU6], 0>, InstrStage<1, [P8_FPU1, P8_FPU2]>], [5, 1, 1]>, + InstrItinData<IIC_FPAddSub , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, + P8_DU4, P8_DU5, P8_DU6], 0>, + InstrStage<1, [P8_FPU1, P8_FPU2]>], + [5, 1, 1]>, InstrItinData<IIC_FPCompare , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3, P8_DU4, P8_DU5, P8_DU6], 0>, InstrStage<1, [P8_FPU1, P8_FPU2]>], diff --git a/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp index f352fa647ac..58d3c3d3fa2 100644 --- a/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp +++ b/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp @@ -136,6 +136,16 @@ protected: // source of the copy, it must still be live here. We can't use // interval testing for a physical register, so as long as we're // walking the MIs we may as well test liveness here. + // + // FIXME: There is a case that occurs in practice, like this: + // %vreg9<def> = COPY %F1; VSSRC:%vreg9 + // ... + // %vreg6<def> = COPY %vreg9; VSSRC:%vreg6,%vreg9 + // %vreg7<def> = COPY %vreg9; VSSRC:%vreg7,%vreg9 + // %vreg9<def,tied1> = XSMADDASP %vreg9<tied0>, %vreg1, %vreg4; VSSRC: + // %vreg6<def,tied1> = XSMADDASP %vreg6<tied0>, %vreg1, %vreg2; VSSRC: + // %vreg7<def,tied1> = XSMADDASP %vreg7<tied0>, %vreg1, %vreg3; VSSRC: + // which prevents an otherwise-profitable transformation. bool OtherUsers = false, KillsAddendSrc = false; for (auto J = std::prev(I), JE = MachineBasicBlock::iterator(AddendMI); J != JE; --J) { |