summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86InstrInfo.h
diff options
context:
space:
mode:
authorJakob Stoklund Olesen <stoklund@2pi.dk>2010-03-25 17:25:00 +0000
committerJakob Stoklund Olesen <stoklund@2pi.dk>2010-03-25 17:25:00 +0000
commit49e121d5e42c5d7429805d7a5a0d1f40b91edecf (patch)
treef3c5a65fadc46e6e27b4c610abf56eb74b79ce6f /llvm/lib/Target/X86/X86InstrInfo.h
parent3f540c0d7d4eb7a8bb42f19e646ad5c79ecffc00 (diff)
downloadbcm5719-llvm-49e121d5e42c5d7429805d7a5a0d1f40b91edecf.tar.gz
bcm5719-llvm-49e121d5e42c5d7429805d7a5a0d1f40b91edecf.zip
Add a late SSEDomainFix pass that twiddles SSE instructions to avoid domain crossings.
On Nehalem and newer CPUs there is a 2 cycle latency penalty on using a register in a different domain than where it was defined. Some instructions have equvivalents for different domains, like por/orps/orpd. The SSEDomainFix pass tries to minimize the number of domain crossings by changing between equvivalent opcodes where possible. This is a work in progress, in particular the pass doesn't do anything yet. SSE instructions are tagged with their execution domain in TableGen using the last two bits of TSFlags. Note that not all instructions are tagged correctly. Life just isn't that simple. The SSE execution domain issue is very similar to the ARM NEON/VFP pipeline issue handled by NEONMoveFixPass. This pass may become target independent to handle both. llvm-svn: 99524
Diffstat (limited to 'llvm/lib/Target/X86/X86InstrInfo.h')
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.h16
1 files changed, 14 insertions, 2 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index 5111719a209..965740dcaf0 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -398,7 +398,10 @@ namespace X86II {
FS = 1 << SegOvrShift,
GS = 2 << SegOvrShift,
- // Bits 22 -> 23 are unused
+ // Execution domain for SSE instructions in bits 22, 23.
+ // 0 in bits 22-23 means normal, non-SSE instruction. See SSEDomain below.
+ SSEDomainShift = 22,
+
OpcodeShift = 24,
OpcodeMask = 0xFF << OpcodeShift
};
@@ -486,7 +489,7 @@ class X86InstrInfo : public TargetInstrInfoImpl {
/// MemOp2RegOpTable - Load / store unfolding opcode map.
///
DenseMap<unsigned*, std::pair<unsigned, unsigned> > MemOp2RegOpTable;
-
+
public:
explicit X86InstrInfo(X86TargetMachine &tm);
@@ -716,6 +719,15 @@ public:
///
unsigned getGlobalBaseReg(MachineFunction *MF) const;
+ /// Some SSE instructions come in variants for three domains.
+ enum SSEDomain { NotSSEDomain, PackedInt, PackedSingle, PackedDouble };
+
+ /// GetSSEDomain - Return the SSE execution domain of MI, or NotSSEDomain for
+ /// unknown instructions. If the instruction has equivalents for other
+ /// domains, equiv points to a list of opcodes for [PackedInt, PackedSingle,
+ /// PackedDouble].
+ SSEDomain GetSSEDomain(const MachineInstr *MI, const unsigned *&equiv) const;
+
private:
MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
MachineFunction::iterator &MFI,
OpenPOWER on IntegriCloud