summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2011-11-15 05:55:35 +0000
committerCraig Topper <craig.topper@gmail.com>2011-11-15 05:55:35 +0000
commit05baa85f58b741f566831f4a29487368d59ad053 (patch)
treedbaea1d75f31774c0cad5cc5ab843c3b26c97f9c /llvm/lib/Target/X86
parentf01faac473730ab72c44b88f23f68d5dbf859ab5 (diff)
downloadbcm5719-llvm-05baa85f58b741f566831f4a29487368d59ad053.tar.gz
bcm5719-llvm-05baa85f58b741f566831f4a29487368d59ad053.zip
Properly qualify AVX2 specific parts of execution dependency table. Also enable converting between 256-bit PS/PD operations when AVX1 is enabled. Fixes PR11370.
llvm-svn: 144622
Diffstat (limited to 'llvm/lib/Target/X86')
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp20
-rw-r--r--llvm/lib/Target/X86/X86TargetMachine.cpp5
2 files changed, 16 insertions, 9 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 9428fffae88..9f7b21f6529 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -3555,7 +3555,11 @@ static const unsigned ReplaceableInstrs[][3] = {
{ X86::VMOVAPSYrr, X86::VMOVAPDYrr, X86::VMOVDQAYrr },
{ X86::VMOVUPSYmr, X86::VMOVUPDYmr, X86::VMOVDQUYmr },
{ X86::VMOVUPSYrm, X86::VMOVUPDYrm, X86::VMOVDQUYrm },
- { X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr },
+ { X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr }
+};
+
+static const unsigned ReplaceableInstrsAVX2[][3] = {
+ //PackedSingle PackedDouble PackedInt
{ X86::VANDNPSYrm, X86::VANDNPDYrm, X86::VPANDNYrm },
{ X86::VANDNPSYrr, X86::VANDNPDYrr, X86::VPANDNYrr },
{ X86::VANDPSYrm, X86::VANDPDYrm, X86::VPANDYrm },
@@ -3563,31 +3567,37 @@ static const unsigned ReplaceableInstrs[][3] = {
{ X86::VORPSYrm, X86::VORPDYrm, X86::VPORYrm },
{ X86::VORPSYrr, X86::VORPDYrr, X86::VPORYrr },
{ X86::VXORPSYrm, X86::VXORPDYrm, X86::VPXORYrm },
- { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr },
+ { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr }
};
// FIXME: Some shuffle and unpack instructions have equivalents in different
// domains, but they require a bit more work than just switching opcodes.
-static const unsigned *lookup(unsigned opcode, unsigned domain) {
+static const unsigned *lookup(unsigned opcode, unsigned domain, bool hasAVX2) {
for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i)
if (ReplaceableInstrs[i][domain-1] == opcode)
return ReplaceableInstrs[i];
+ if (domain != 3 || hasAVX2) // only use PackedInt domain if AVX2 is enabled
+ for (unsigned i = 0, e = array_lengthof(ReplaceableInstrsAVX2); i != e; ++i)
+ if (ReplaceableInstrsAVX2[i][domain-1] == opcode)
+ return ReplaceableInstrsAVX2[i];
return 0;
}
std::pair<uint16_t, uint16_t>
X86InstrInfo::getExecutionDomain(const MachineInstr *MI) const {
uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
+ bool hasAVX2 = TM.getSubtarget<X86Subtarget>().hasAVX2();
return std::make_pair(domain,
- domain && lookup(MI->getOpcode(), domain) ? 0xe : 0);
+ domain && lookup(MI->getOpcode(), domain, hasAVX2) ? 0xe : 0);
}
void X86InstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
assert(Domain>0 && Domain<4 && "Invalid execution domain");
uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
assert(dom && "Not an SSE instruction");
- const unsigned *table = lookup(MI->getOpcode(), dom);
+ bool hasAVX2 = TM.getSubtarget<X86Subtarget>().hasAVX2();
+ const unsigned *table = lookup(MI->getOpcode(), dom, hasAVX2);
assert(table && "Cannot change domain");
MI->setDesc(get(table[Domain-1]));
}
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index 4d4d7c06ab9..feb71555c46 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -140,10 +140,7 @@ bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM,
PM.add(createExecutionDependencyFixPass(&X86::VR128RegClass));
ShouldPrint = true;
}
- if (Subtarget.hasAVX2()) {
- // FIXME this should be turned on for just AVX, but the pass doesn't check
- // that instructions are valid before replacing them and there are AVX2
- // integer instructions in the table.
+ if (Subtarget.hasAVX()) {
PM.add(createExecutionDependencyFixPass(&X86::VR256RegClass));
ShouldPrint = true;
}
OpenPOWER on IntegriCloud