diff options
author | Craig Topper <craig.topper@gmail.com> | 2017-02-11 05:32:57 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@gmail.com> | 2017-02-11 05:32:57 +0000 |
commit | 464b8cb24427478bdcee5d8bb338fb219d0751ed (patch) | |
tree | f50e506887b572293f740032089bcb968c4dcd30 /llvm/lib/Target/X86/X86InstrInfo.cpp | |
parent | a67cf0001f5d24305f6c443da05266781c94de2e (diff) | |
download | bcm5719-llvm-464b8cb24427478bdcee5d8bb338fb219d0751ed.tar.gz bcm5719-llvm-464b8cb24427478bdcee5d8bb338fb219d0751ed.zip |
[X86] Don't base domain decisions on VEXTRACTF128/VINSERTF128 if only AVX1 is available.
Seems the execution dependency pass likes to use FP instructions when most of the consuming code is integer if a vextractf128 instruction produced the register. Without AVX2 we don't have the corresponding integer instruction available.
This patch suppresses the domain on these instructions to GenericDomain if AVX2 is not supported so that they are ignored by domain fixing. If AVX2 is supported we'll report the correct domain and allow them to switch between integer and fp.
Overall I think this produces better results in the modified test cases.
llvm-svn: 294824
Diffstat (limited to 'llvm/lib/Target/X86/X86InstrInfo.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 23 |
1 files changed, 19 insertions, 4 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 716500a20b0..f6fac236368 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -8863,10 +8863,6 @@ static const uint16_t ReplaceableInstrsAVX2[][3] = { { X86::VORPSYrr, X86::VORPDYrr, X86::VPORYrr }, { X86::VXORPSYrm, X86::VXORPDYrm, X86::VPXORYrm }, { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr }, - { X86::VEXTRACTF128mr, X86::VEXTRACTF128mr, X86::VEXTRACTI128mr }, - { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, X86::VEXTRACTI128rr }, - { X86::VINSERTF128rm, X86::VINSERTF128rm, X86::VINSERTI128rm }, - { X86::VINSERTF128rr, X86::VINSERTF128rr, X86::VINSERTI128rr }, { X86::VPERM2F128rm, X86::VPERM2F128rm, X86::VPERM2I128rm }, { X86::VPERM2F128rr, X86::VPERM2F128rr, X86::VPERM2I128rr }, { X86::VBROADCASTSSrm, X86::VBROADCASTSSrm, X86::VPBROADCASTDrm}, @@ -8878,6 +8874,14 @@ static const uint16_t ReplaceableInstrsAVX2[][3] = { { X86::VBROADCASTF128, X86::VBROADCASTF128, X86::VBROADCASTI128 }, }; +static const uint16_t ReplaceableInstrsAVX2InsertExtract[][3] = { + //PackedSingle PackedDouble PackedInt + { X86::VEXTRACTF128mr, X86::VEXTRACTF128mr, X86::VEXTRACTI128mr }, + { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, X86::VEXTRACTI128rr }, + { X86::VINSERTF128rm, X86::VINSERTF128rm, X86::VINSERTI128rm }, + { X86::VINSERTF128rr, X86::VINSERTF128rr, X86::VINSERTI128rr }, +}; + static const uint16_t ReplaceableInstrsAVX512[][4] = { // Two integer columns for 64-bit and 32-bit elements. //PackedSingle PackedDouble PackedInt PackedInt @@ -9139,6 +9143,12 @@ X86InstrInfo::getExecutionDomain(const MachineInstr &MI) const { validDomains = 0xe; } else if (lookup(opcode, domain, ReplaceableInstrsAVX2)) { validDomains = Subtarget.hasAVX2() ? 0xe : 0x6; + } else if (lookup(opcode, domain, ReplaceableInstrsAVX2InsertExtract)) { + // Insert/extract instructions should only effect domain if AVX2 + // is enabled. + if (!Subtarget.hasAVX2()) + return std::make_pair(0, 0); + validDomains = 0xe; } else if (lookupAVX512(opcode, domain, ReplaceableInstrsAVX512)) { validDomains = 0xe; } else if (Subtarget.hasDQI() && lookupAVX512(opcode, domain, @@ -9167,6 +9177,11 @@ void X86InstrInfo::setExecutionDomain(MachineInstr &MI, unsigned Domain) const { "256-bit vector operations only available in AVX2"); table = lookup(MI.getOpcode(), dom, ReplaceableInstrsAVX2); } + if (!table) { // try the other table + assert(Subtarget.hasAVX2() && + "256-bit insert/extract only available in AVX2"); + table = lookup(MI.getOpcode(), dom, ReplaceableInstrsAVX2InsertExtract); + } if (!table) { // try the AVX512 table assert(Subtarget.hasAVX512() && "Requires AVX-512"); table = lookupAVX512(MI.getOpcode(), dom, ReplaceableInstrsAVX512); |