summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2010-10-19 00:55:07 +0000
committerEvan Cheng <evan.cheng@apple.com>2010-10-19 00:55:07 +0000
commit8249dfe6ced136a7143a805ed84af74ab4747010 (patch)
tree03c54b2774d78acaade03ae15d0ac0e274a04897 /llvm/lib/Target
parenta8807f93b715647b50fc0b85b3956abb62a2c348 (diff)
downloadbcm5719-llvm-8249dfe6ced136a7143a805ed84af74ab4747010.tar.gz
bcm5719-llvm-8249dfe6ced136a7143a805ed84af74ab4747010.zip
- Add a hook for target to determine whether an instruction def is
"long latency" enough to hoist even if it may increase spilling. Reloading a value from spill slot is often cheaper than performing an expensive computation in the loop. For X86, that means machine LICM will hoist SQRT, DIV, etc. ARM will be somewhat aggressive with VFP and NEON instructions. - Enable register pressure aware machine LICM by default. llvm-svn: 116781
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp20
-rw-r--r--llvm/lib/Target/ARM/ARMBaseInstrInfo.h5
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp35
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.h5
4 files changed, 65 insertions, 0 deletions
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index aca292abea5..0b5b2437abc 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1925,3 +1925,23 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
return getOperandLatency(ItinData, DefTID, DefIdx, DefAlign,
UseTID, UseIdx, UseAlign);
}
+
+bool ARMBaseInstrInfo::
+hasHighOperandLatency(const InstrItineraryData *ItinData,
+ const MachineRegisterInfo *MRI,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const {
+ unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
+ unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask;
+ if (Subtarget.isCortexA8() &&
+ (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
+ // CortexA8 VFP instructions are not pipelined.
+ return true;
+
+ // Hoist VFP / NEON instructions with 4 or higher latency.
+ int Latency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
+ if (Latency <= 3)
+ return false;
+ return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
+ UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
+}
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index 36be3366d29..b3a832948f0 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -377,6 +377,11 @@ private:
unsigned DefIdx, unsigned DefAlign,
const TargetInstrDesc &UseTID,
unsigned UseIdx, unsigned UseAlign) const;
+
+ bool hasHighOperandLatency(const InstrItineraryData *ItinData,
+ const MachineRegisterInfo *MRI,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const;
};
static inline
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 40ef3dbd0d7..79d9872a1b2 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -3152,6 +3152,41 @@ void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
NopInst.setOpcode(X86::NOOP);
}
+bool X86InstrInfo::
+hasHighOperandLatency(const InstrItineraryData *ItinData,
+ const MachineRegisterInfo *MRI,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const {
+ switch (DefMI->getOpcode()) {
+ default: return false;
+ case X86::DIVSDrm:
+ case X86::DIVSDrm_Int:
+ case X86::DIVSDrr:
+ case X86::DIVSDrr_Int:
+ case X86::DIVSSrm:
+ case X86::DIVSSrm_Int:
+ case X86::DIVSSrr:
+ case X86::DIVSSrr_Int:
+ case X86::SQRTPDm:
+ case X86::SQRTPDm_Int:
+ case X86::SQRTPDr:
+ case X86::SQRTPDr_Int:
+ case X86::SQRTPSm:
+ case X86::SQRTPSm_Int:
+ case X86::SQRTPSr:
+ case X86::SQRTPSr_Int:
+ case X86::SQRTSDm:
+ case X86::SQRTSDm_Int:
+ case X86::SQRTSDr:
+ case X86::SQRTSDr_Int:
+ case X86::SQRTSSm:
+ case X86::SQRTSSm_Int:
+ case X86::SQRTSSr:
+ case X86::SQRTSSr_Int:
+ return true;
+ }
+}
+
namespace {
/// CGBR - Create Global Base Reg pass. This initializes the PIC
/// global base register for x86-32.
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index e43cfacae50..5060ad836af 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -864,6 +864,11 @@ public:
unsigned OpNum,
const SmallVectorImpl<MachineOperand> &MOs,
unsigned Size, unsigned Alignment) const;
+
+ bool hasHighOperandLatency(const InstrItineraryData *ItinData,
+ const MachineRegisterInfo *MRI,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const;
private:
MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
OpenPOWER on IntegriCloud