summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2019-09-07 21:54:40 +0000
committerCraig Topper <craig.topper@intel.com>2019-09-07 21:54:40 +0000
commit1829a09bea15d44053a1adc72e34fab367779c49 (patch)
tree4c801c2b32d183f1071d1656733631c30e9bc158 /llvm/lib/Target
parenta461c26dd85e864385b4143390c20728e424dc95 (diff)
downloadbcm5719-llvm-1829a09bea15d44053a1adc72e34fab367779c49.tar.gz
bcm5719-llvm-1829a09bea15d44053a1adc72e34fab367779c49.zip
[X86] Add support for unfold broadcast loads from FMA instructions.
llvm-svn: 371323
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86InstrFoldTables.cpp121
1 files changed, 121 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86InstrFoldTables.cpp b/llvm/lib/Target/X86/X86InstrFoldTables.cpp
index 4f98fdf5ea4..63474f9e504 100644
--- a/llvm/lib/Target/X86/X86InstrFoldTables.cpp
+++ b/llvm/lib/Target/X86/X86InstrFoldTables.cpp
@@ -5308,6 +5308,117 @@ static const X86MemoryFoldTableEntry BroadcastFoldTable2[] = {
{ X86::VSUBPSZrr, X86::VSUBPSZrmb, TB_BCAST_SS },
};
+static const X86MemoryFoldTableEntry BroadcastFoldTable3[] = {
+ { X86::VFMADD132PDZ128r, X86::VFMADD132PDZ128mb, TB_BCAST_SD },
+ { X86::VFMADD132PDZ256r, X86::VFMADD132PDZ256mb, TB_BCAST_SD },
+ { X86::VFMADD132PDZr, X86::VFMADD132PDZmb, TB_BCAST_SD },
+ { X86::VFMADD132PSZ128r, X86::VFMADD132PSZ128mb, TB_BCAST_SS },
+ { X86::VFMADD132PSZ256r, X86::VFMADD132PSZ256mb, TB_BCAST_SS },
+ { X86::VFMADD132PSZr, X86::VFMADD132PSZmb, TB_BCAST_SS },
+ { X86::VFMADD213PDZ128r, X86::VFMADD213PDZ128mb, TB_BCAST_SD },
+ { X86::VFMADD213PDZ256r, X86::VFMADD213PDZ256mb, TB_BCAST_SD },
+ { X86::VFMADD213PDZr, X86::VFMADD213PDZmb, TB_BCAST_SD },
+ { X86::VFMADD213PSZ128r, X86::VFMADD213PSZ128mb, TB_BCAST_SS },
+ { X86::VFMADD213PSZ256r, X86::VFMADD213PSZ256mb, TB_BCAST_SS },
+ { X86::VFMADD213PSZr, X86::VFMADD213PSZmb, TB_BCAST_SS },
+ { X86::VFMADD231PDZ128r, X86::VFMADD231PDZ128mb, TB_BCAST_SD },
+ { X86::VFMADD231PDZ256r, X86::VFMADD231PDZ256mb, TB_BCAST_SD },
+ { X86::VFMADD231PDZr, X86::VFMADD231PDZmb, TB_BCAST_SD },
+ { X86::VFMADD231PSZ128r, X86::VFMADD231PSZ128mb, TB_BCAST_SS },
+ { X86::VFMADD231PSZ256r, X86::VFMADD231PSZ256mb, TB_BCAST_SS },
+ { X86::VFMADD231PSZr, X86::VFMADD231PSZmb, TB_BCAST_SS },
+ { X86::VFMADDSUB132PDZ128r, X86::VFMADDSUB132PDZ128mb, TB_BCAST_SD },
+ { X86::VFMADDSUB132PDZ256r, X86::VFMADDSUB132PDZ256mb, TB_BCAST_SD },
+ { X86::VFMADDSUB132PDZr, X86::VFMADDSUB132PDZmb, TB_BCAST_SD },
+ { X86::VFMADDSUB132PSZ128r, X86::VFMADDSUB132PSZ128mb, TB_BCAST_SS },
+ { X86::VFMADDSUB132PSZ256r, X86::VFMADDSUB132PSZ256mb, TB_BCAST_SS },
+ { X86::VFMADDSUB132PSZr, X86::VFMADDSUB132PSZmb, TB_BCAST_SS },
+ { X86::VFMADDSUB213PDZ128r, X86::VFMADDSUB213PDZ128mb, TB_BCAST_SD },
+ { X86::VFMADDSUB213PDZ256r, X86::VFMADDSUB213PDZ256mb, TB_BCAST_SD },
+ { X86::VFMADDSUB213PDZr, X86::VFMADDSUB213PDZmb, TB_BCAST_SD },
+ { X86::VFMADDSUB213PSZ128r, X86::VFMADDSUB213PSZ128mb, TB_BCAST_SS },
+ { X86::VFMADDSUB213PSZ256r, X86::VFMADDSUB213PSZ256mb, TB_BCAST_SS },
+ { X86::VFMADDSUB213PSZr, X86::VFMADDSUB213PSZmb, TB_BCAST_SS },
+ { X86::VFMADDSUB231PDZ128r, X86::VFMADDSUB231PDZ128mb, TB_BCAST_SD },
+ { X86::VFMADDSUB231PDZ256r, X86::VFMADDSUB231PDZ256mb, TB_BCAST_SD },
+ { X86::VFMADDSUB231PDZr, X86::VFMADDSUB231PDZmb, TB_BCAST_SD },
+ { X86::VFMADDSUB231PSZ128r, X86::VFMADDSUB231PSZ128mb, TB_BCAST_SS },
+ { X86::VFMADDSUB231PSZ256r, X86::VFMADDSUB231PSZ256mb, TB_BCAST_SS },
+ { X86::VFMADDSUB231PSZr, X86::VFMADDSUB231PSZmb, TB_BCAST_SS },
+ { X86::VFMSUB132PDZ128r, X86::VFMSUB132PDZ128mb, TB_BCAST_SD },
+ { X86::VFMSUB132PDZ256r, X86::VFMSUB132PDZ256mb, TB_BCAST_SD },
+ { X86::VFMSUB132PDZr, X86::VFMSUB132PDZmb, TB_BCAST_SD },
+ { X86::VFMSUB132PSZ128r, X86::VFMSUB132PSZ128mb, TB_BCAST_SS },
+ { X86::VFMSUB132PSZ256r, X86::VFMSUB132PSZ256mb, TB_BCAST_SS },
+ { X86::VFMSUB132PSZr, X86::VFMSUB132PSZmb, TB_BCAST_SS },
+ { X86::VFMSUB213PDZ128r, X86::VFMSUB213PDZ128mb, TB_BCAST_SD },
+ { X86::VFMSUB213PDZ256r, X86::VFMSUB213PDZ256mb, TB_BCAST_SD },
+ { X86::VFMSUB213PDZr, X86::VFMSUB213PDZmb, TB_BCAST_SD },
+ { X86::VFMSUB213PSZ128r, X86::VFMSUB213PSZ128mb, TB_BCAST_SS },
+ { X86::VFMSUB213PSZ256r, X86::VFMSUB213PSZ256mb, TB_BCAST_SS },
+ { X86::VFMSUB213PSZr, X86::VFMSUB213PSZmb, TB_BCAST_SS },
+ { X86::VFMSUB231PDZ128r, X86::VFMSUB231PDZ128mb, TB_BCAST_SD },
+ { X86::VFMSUB231PDZ256r, X86::VFMSUB231PDZ256mb, TB_BCAST_SD },
+ { X86::VFMSUB231PDZr, X86::VFMSUB231PDZmb, TB_BCAST_SD },
+ { X86::VFMSUB231PSZ128r, X86::VFMSUB231PSZ128mb, TB_BCAST_SS },
+ { X86::VFMSUB231PSZ256r, X86::VFMSUB231PSZ256mb, TB_BCAST_SS },
+ { X86::VFMSUB231PSZr, X86::VFMSUB231PSZmb, TB_BCAST_SS },
+ { X86::VFMSUBADD132PDZ128r, X86::VFMSUBADD132PDZ128mb, TB_BCAST_SD },
+ { X86::VFMSUBADD132PDZ256r, X86::VFMSUBADD132PDZ256mb, TB_BCAST_SD },
+ { X86::VFMSUBADD132PDZr, X86::VFMSUBADD132PDZmb, TB_BCAST_SD },
+ { X86::VFMSUBADD132PSZ128r, X86::VFMSUBADD132PSZ128mb, TB_BCAST_SS },
+ { X86::VFMSUBADD132PSZ256r, X86::VFMSUBADD132PSZ256mb, TB_BCAST_SS },
+ { X86::VFMSUBADD132PSZr, X86::VFMSUBADD132PSZmb, TB_BCAST_SS },
+ { X86::VFMSUBADD213PDZ128r, X86::VFMSUBADD213PDZ128mb, TB_BCAST_SD },
+ { X86::VFMSUBADD213PDZ256r, X86::VFMSUBADD213PDZ256mb, TB_BCAST_SD },
+ { X86::VFMSUBADD213PDZr, X86::VFMSUBADD213PDZmb, TB_BCAST_SD },
+ { X86::VFMSUBADD213PSZ128r, X86::VFMSUBADD213PSZ128mb, TB_BCAST_SS },
+ { X86::VFMSUBADD213PSZ256r, X86::VFMSUBADD213PSZ256mb, TB_BCAST_SS },
+ { X86::VFMSUBADD213PSZr, X86::VFMSUBADD213PSZmb, TB_BCAST_SS },
+ { X86::VFMSUBADD231PDZ128r, X86::VFMSUBADD231PDZ128mb, TB_BCAST_SD },
+ { X86::VFMSUBADD231PDZ256r, X86::VFMSUBADD231PDZ256mb, TB_BCAST_SD },
+ { X86::VFMSUBADD231PDZr, X86::VFMSUBADD231PDZmb, TB_BCAST_SD },
+ { X86::VFMSUBADD231PSZ128r, X86::VFMSUBADD231PSZ128mb, TB_BCAST_SS },
+ { X86::VFMSUBADD231PSZ256r, X86::VFMSUBADD231PSZ256mb, TB_BCAST_SS },
+ { X86::VFMSUBADD231PSZr, X86::VFMSUBADD231PSZmb, TB_BCAST_SS },
+ { X86::VFNMADD132PDZ128r, X86::VFNMADD132PDZ128mb, TB_BCAST_SD },
+ { X86::VFNMADD132PDZ256r, X86::VFNMADD132PDZ256mb, TB_BCAST_SD },
+ { X86::VFNMADD132PDZr, X86::VFNMADD132PDZmb, TB_BCAST_SD },
+ { X86::VFNMADD132PSZ128r, X86::VFNMADD132PSZ128mb, TB_BCAST_SS },
+ { X86::VFNMADD132PSZ256r, X86::VFNMADD132PSZ256mb, TB_BCAST_SS },
+ { X86::VFNMADD132PSZr, X86::VFNMADD132PSZmb, TB_BCAST_SS },
+ { X86::VFNMADD213PDZ128r, X86::VFNMADD213PDZ128mb, TB_BCAST_SD },
+ { X86::VFNMADD213PDZ256r, X86::VFNMADD213PDZ256mb, TB_BCAST_SD },
+ { X86::VFNMADD213PDZr, X86::VFNMADD213PDZmb, TB_BCAST_SD },
+ { X86::VFNMADD213PSZ128r, X86::VFNMADD213PSZ128mb, TB_BCAST_SS },
+ { X86::VFNMADD213PSZ256r, X86::VFNMADD213PSZ256mb, TB_BCAST_SS },
+ { X86::VFNMADD213PSZr, X86::VFNMADD213PSZmb, TB_BCAST_SS },
+ { X86::VFNMADD231PDZ128r, X86::VFNMADD231PDZ128mb, TB_BCAST_SD },
+ { X86::VFNMADD231PDZ256r, X86::VFNMADD231PDZ256mb, TB_BCAST_SD },
+ { X86::VFNMADD231PDZr, X86::VFNMADD231PDZmb, TB_BCAST_SD },
+ { X86::VFNMADD231PSZ128r, X86::VFNMADD231PSZ128mb, TB_BCAST_SS },
+ { X86::VFNMADD231PSZ256r, X86::VFNMADD231PSZ256mb, TB_BCAST_SS },
+ { X86::VFNMADD231PSZr, X86::VFNMADD231PSZmb, TB_BCAST_SS },
+ { X86::VFNMSUB132PDZ128r, X86::VFNMSUB132PDZ128mb, TB_BCAST_SD },
+ { X86::VFNMSUB132PDZ256r, X86::VFNMSUB132PDZ256mb, TB_BCAST_SD },
+ { X86::VFNMSUB132PDZr, X86::VFNMSUB132PDZmb, TB_BCAST_SD },
+ { X86::VFNMSUB132PSZ128r, X86::VFNMSUB132PSZ128mb, TB_BCAST_SS },
+ { X86::VFNMSUB132PSZ256r, X86::VFNMSUB132PSZ256mb, TB_BCAST_SS },
+ { X86::VFNMSUB132PSZr, X86::VFNMSUB132PSZmb, TB_BCAST_SS },
+ { X86::VFNMSUB213PDZ128r, X86::VFNMSUB213PDZ128mb, TB_BCAST_SD },
+ { X86::VFNMSUB213PDZ256r, X86::VFNMSUB213PDZ256mb, TB_BCAST_SD },
+ { X86::VFNMSUB213PDZr, X86::VFNMSUB213PDZmb, TB_BCAST_SD },
+ { X86::VFNMSUB213PSZ128r, X86::VFNMSUB213PSZ128mb, TB_BCAST_SS },
+ { X86::VFNMSUB213PSZ256r, X86::VFNMSUB213PSZ256mb, TB_BCAST_SS },
+ { X86::VFNMSUB213PSZr, X86::VFNMSUB213PSZmb, TB_BCAST_SS },
+ { X86::VFNMSUB231PDZ128r, X86::VFNMSUB231PDZ128mb, TB_BCAST_SD },
+ { X86::VFNMSUB231PDZ256r, X86::VFNMSUB231PDZ256mb, TB_BCAST_SD },
+ { X86::VFNMSUB231PDZr, X86::VFNMSUB231PDZmb, TB_BCAST_SD },
+ { X86::VFNMSUB231PSZ128r, X86::VFNMSUB231PSZ128mb, TB_BCAST_SS },
+ { X86::VFNMSUB231PSZ256r, X86::VFNMSUB231PSZ256mb, TB_BCAST_SS },
+ { X86::VFNMSUB231PSZr, X86::VFNMSUB231PSZmb, TB_BCAST_SS },
+};
+
static const X86MemoryFoldTableEntry *
lookupFoldTableImpl(ArrayRef<X86MemoryFoldTableEntry> Table, unsigned RegOp) {
#ifndef NDEBUG
@@ -5356,6 +5467,12 @@ lookupFoldTableImpl(ArrayRef<X86MemoryFoldTableEntry> Table, unsigned RegOp) {
std::end(BroadcastFoldTable2)) ==
std::end(BroadcastFoldTable2) &&
"BroadcastFoldTable2 is not sorted and unique!");
+ assert(std::is_sorted(std::begin(BroadcastFoldTable3),
+ std::end(BroadcastFoldTable3)) &&
+ std::adjacent_find(std::begin(BroadcastFoldTable3),
+ std::end(BroadcastFoldTable3)) ==
+ std::end(BroadcastFoldTable3) &&
+ "BroadcastFoldTable3 is not sorted and unique!");
FoldTablesChecked.store(true, std::memory_order_relaxed);
}
#endif
@@ -5429,6 +5546,10 @@ struct X86MemUnfoldTable {
// Index 2, folded broadcast
addTableEntry(Entry, TB_INDEX_2 | TB_FOLDED_LOAD | TB_FOLDED_BCAST);
+ for (const X86MemoryFoldTableEntry &Entry : BroadcastFoldTable3)
+ // Index 2, folded broadcast
+ addTableEntry(Entry, TB_INDEX_3 | TB_FOLDED_LOAD | TB_FOLDED_BCAST);
+
// Sort the memory->reg unfold table.
array_pod_sort(Table.begin(), Table.end());
OpenPOWER on IntegriCloud