summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h2
-rw-r--r--llvm/lib/Target/X86/X86InstrFMA3Info.cpp355
-rw-r--r--llvm/lib/Target/X86/X86InstrFMA3Info.h20
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp6
4 files changed, 149 insertions, 234 deletions
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index a1f8c1b94bc..497e29fe628 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -580,7 +580,7 @@ namespace X86II {
// getBaseOpcodeFor - This function returns the "base" X86 opcode for the
// specified machine instruction.
//
- inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) {
+ inline uint8_t getBaseOpcodeFor(uint64_t TSFlags) {
return TSFlags >> X86II::OpcodeShift;
}
diff --git a/llvm/lib/Target/X86/X86InstrFMA3Info.cpp b/llvm/lib/Target/X86/X86InstrFMA3Info.cpp
index d107a9b46c1..663b13279cd 100644
--- a/llvm/lib/Target/X86/X86InstrFMA3Info.cpp
+++ b/llvm/lib/Target/X86/X86InstrFMA3Info.cpp
@@ -21,239 +21,148 @@
using namespace llvm;
-#define FMA3BASE(X132, X213, X231, Attrs) \
- { { X132, X213, X231 }, Attrs },
-
-#define FMA3RA(R132, R213, R231, Attrs) \
- FMA3BASE(X86::R132, X86::R213, X86::R231, Attrs)
-
-#define FMA3R(R132, R213, R231) \
- FMA3RA(R132, R213, R231, 0)
-
-#define FMA3MA(M132, M213, M231, Attrs) \
- FMA3BASE(X86::M132, X86::M213, X86::M231, Attrs)
-
-#define FMA3M(M132, M213, M231) \
- FMA3MA(M132, M213, M231, 0)
-
-#define FMA3RMA(R132, R213, R231, M132, M213, M231, Attrs) \
- FMA3RA(R132, R213, R231, Attrs) \
- FMA3MA(M132, M213, M231, Attrs)
-
-#define FMA3RM(R132, R213, R231, M132, M213, M231) \
- FMA3RMA(R132, R213, R231, M132, M213, M231, 0)
-
-#define FMA3_AVX2_VECTOR_GROUP(Name) \
- FMA3RM(Name##132PSr, Name##213PSr, Name##231PSr, \
- Name##132PSm, Name##213PSm, Name##231PSm) \
- FMA3RM(Name##132PDr, Name##213PDr, Name##231PDr, \
- Name##132PDm, Name##213PDm, Name##231PDm) \
- FMA3RM(Name##132PSYr, Name##213PSYr, Name##231PSYr, \
- Name##132PSYm, Name##213PSYm, Name##231PSYm) \
- FMA3RM(Name##132PDYr, Name##213PDYr, Name##231PDYr, \
- Name##132PDYm, Name##213PDYm, Name##231PDYm)
-
-#define FMA3_AVX2_SCALAR_GROUP(Name) \
- FMA3RM(Name##132SSr, Name##213SSr, Name##231SSr, \
- Name##132SSm, Name##213SSm, Name##231SSm) \
- FMA3RM(Name##132SDr, Name##213SDr, Name##231SDr, \
- Name##132SDm, Name##213SDm, Name##231SDm) \
- FMA3RMA(Name##132SSr_Int, Name##213SSr_Int, Name##231SSr_Int, \
- Name##132SSm_Int, Name##213SSm_Int, Name##231SSm_Int, \
- X86InstrFMA3Group::X86FMA3Intrinsic) \
- FMA3RMA(Name##132SDr_Int, Name##213SDr_Int, Name##231SDr_Int, \
- Name##132SDm_Int, Name##213SDm_Int, Name##231SDm_Int, \
- X86InstrFMA3Group::X86FMA3Intrinsic)
-
-#define FMA3_AVX2_FULL_GROUP(Name) \
- FMA3_AVX2_VECTOR_GROUP(Name) \
- FMA3_AVX2_SCALAR_GROUP(Name)
-
-#define FMA3_AVX512_VECTOR_GROUP(Name) \
- FMA3RM(Name##132PSZ128r, Name##213PSZ128r, Name##231PSZ128r, \
- Name##132PSZ128m, Name##213PSZ128m, Name##231PSZ128m) \
- FMA3RM(Name##132PDZ128r, Name##213PDZ128r, Name##231PDZ128r, \
- Name##132PDZ128m, Name##213PDZ128m, Name##231PDZ128m) \
- FMA3RM(Name##132PSZ256r, Name##213PSZ256r, Name##231PSZ256r, \
- Name##132PSZ256m, Name##213PSZ256m, Name##231PSZ256m) \
- FMA3RM(Name##132PDZ256r, Name##213PDZ256r, Name##231PDZ256r, \
- Name##132PDZ256m, Name##213PDZ256m, Name##231PDZ256m) \
- FMA3RM(Name##132PSZr, Name##213PSZr, Name##231PSZr, \
- Name##132PSZm, Name##213PSZm, Name##231PSZm) \
- FMA3RM(Name##132PDZr, Name##213PDZr, Name##231PDZr, \
- Name##132PDZm, Name##213PDZm, Name##231PDZm) \
- FMA3RMA(Name##132PSZ128rk, Name##213PSZ128rk, Name##231PSZ128rk, \
- Name##132PSZ128mk, Name##213PSZ128mk, Name##231PSZ128mk, \
- X86InstrFMA3Group::X86FMA3KMergeMasked) \
- FMA3RMA(Name##132PDZ128rk, Name##213PDZ128rk, Name##231PDZ128rk, \
- Name##132PDZ128mk, Name##213PDZ128mk, Name##231PDZ128mk, \
- X86InstrFMA3Group::X86FMA3KMergeMasked) \
- FMA3RMA(Name##132PSZ256rk, Name##213PSZ256rk, Name##231PSZ256rk, \
- Name##132PSZ256mk, Name##213PSZ256mk, Name##231PSZ256mk, \
- X86InstrFMA3Group::X86FMA3KMergeMasked) \
- FMA3RMA(Name##132PDZ256rk, Name##213PDZ256rk, Name##231PDZ256rk, \
- Name##132PDZ256mk, Name##213PDZ256mk, Name##231PDZ256mk, \
- X86InstrFMA3Group::X86FMA3KMergeMasked) \
- FMA3RMA(Name##132PSZrk, Name##213PSZrk, Name##231PSZrk, \
- Name##132PSZmk, Name##213PSZmk, Name##231PSZmk, \
- X86InstrFMA3Group::X86FMA3KMergeMasked) \
- FMA3RMA(Name##132PDZrk, Name##213PDZrk, Name##231PDZrk, \
- Name##132PDZmk, Name##213PDZmk, Name##231PDZmk, \
- X86InstrFMA3Group::X86FMA3KMergeMasked) \
- FMA3RMA(Name##132PSZ128rkz, Name##213PSZ128rkz, Name##231PSZ128rkz, \
- Name##132PSZ128mkz, Name##213PSZ128mkz, Name##231PSZ128mkz, \
- X86InstrFMA3Group::X86FMA3KZeroMasked) \
- FMA3RMA(Name##132PDZ128rkz, Name##213PDZ128rkz, Name##231PDZ128rkz, \
- Name##132PDZ128mkz, Name##213PDZ128mkz, Name##231PDZ128mkz, \
- X86InstrFMA3Group::X86FMA3KZeroMasked) \
- FMA3RMA(Name##132PSZ256rkz, Name##213PSZ256rkz, Name##231PSZ256rkz, \
- Name##132PSZ256mkz, Name##213PSZ256mkz, Name##231PSZ256mkz, \
- X86InstrFMA3Group::X86FMA3KZeroMasked) \
- FMA3RMA(Name##132PDZ256rkz, Name##213PDZ256rkz, Name##231PDZ256rkz, \
- Name##132PDZ256mkz, Name##213PDZ256mkz, Name##231PDZ256mkz, \
- X86InstrFMA3Group::X86FMA3KZeroMasked) \
- FMA3RMA(Name##132PSZrkz, Name##213PSZrkz, Name##231PSZrkz, \
- Name##132PSZmkz, Name##213PSZmkz, Name##231PSZmkz, \
- X86InstrFMA3Group::X86FMA3KZeroMasked) \
- FMA3RMA(Name##132PDZrkz, Name##213PDZrkz, Name##231PDZrkz, \
- Name##132PDZmkz, Name##213PDZmkz, Name##231PDZmkz, \
- X86InstrFMA3Group::X86FMA3KZeroMasked) \
- FMA3R(Name##132PSZrb, Name##213PSZrb, Name##231PSZrb) \
- FMA3R(Name##132PDZrb, Name##213PDZrb, Name##231PDZrb) \
- FMA3RA(Name##132PSZrbk, Name##213PSZrbk, Name##231PSZrbk, \
- X86InstrFMA3Group::X86FMA3KMergeMasked) \
- FMA3RA(Name##132PDZrbk, Name##213PDZrbk, Name##231PDZrbk, \
- X86InstrFMA3Group::X86FMA3KMergeMasked) \
- FMA3RA(Name##132PSZrbkz, Name##213PSZrbkz, Name##231PSZrbkz, \
- X86InstrFMA3Group::X86FMA3KZeroMasked) \
- FMA3RA(Name##132PDZrbkz, Name##213PDZrbkz, Name##231PDZrbkz, \
- X86InstrFMA3Group::X86FMA3KZeroMasked) \
- FMA3M(Name##132PSZ128mb, Name##213PSZ128mb, Name##231PSZ128mb) \
- FMA3M(Name##132PDZ128mb, Name##213PDZ128mb, Name##231PDZ128mb) \
- FMA3M(Name##132PSZ256mb, Name##213PSZ256mb, Name##231PSZ256mb) \
- FMA3M(Name##132PDZ256mb, Name##213PDZ256mb, Name##231PDZ256mb) \
- FMA3M(Name##132PSZmb, Name##213PSZmb, Name##231PSZmb) \
- FMA3M(Name##132PDZmb, Name##213PDZmb, Name##231PDZmb) \
- FMA3MA(Name##132PSZ128mbk, Name##213PSZ128mbk, Name##231PSZ128mbk, \
- X86InstrFMA3Group::X86FMA3KMergeMasked) \
- FMA3MA(Name##132PDZ128mbk, Name##213PDZ128mbk, Name##231PDZ128mbk, \
- X86InstrFMA3Group::X86FMA3KMergeMasked) \
- FMA3MA(Name##132PSZ256mbk, Name##213PSZ256mbk, Name##231PSZ256mbk, \
- X86InstrFMA3Group::X86FMA3KMergeMasked) \
- FMA3MA(Name##132PDZ256mbk, Name##213PDZ256mbk, Name##231PDZ256mbk, \
- X86InstrFMA3Group::X86FMA3KMergeMasked) \
- FMA3MA(Name##132PSZmbk, Name##213PSZmbk, Name##231PSZmbk, \
- X86InstrFMA3Group::X86FMA3KMergeMasked) \
- FMA3MA(Name##132PDZmbk, Name##213PDZmbk, Name##231PDZmbk, \
- X86InstrFMA3Group::X86FMA3KMergeMasked) \
- FMA3MA(Name##132PSZ128mbkz, Name##213PSZ128mbkz, Name##231PSZ128mbkz, \
- X86InstrFMA3Group::X86FMA3KZeroMasked) \
- FMA3MA(Name##132PDZ128mbkz, Name##213PDZ128mbkz, Name##231PDZ128mbkz, \
- X86InstrFMA3Group::X86FMA3KZeroMasked) \
- FMA3MA(Name##132PSZ256mbkz, Name##213PSZ256mbkz, Name##231PSZ256mbkz, \
- X86InstrFMA3Group::X86FMA3KZeroMasked) \
- FMA3MA(Name##132PDZ256mbkz, Name##213PDZ256mbkz, Name##231PDZ256mbkz, \
- X86InstrFMA3Group::X86FMA3KZeroMasked) \
- FMA3MA(Name##132PSZmbkz, Name##213PSZmbkz, Name##231PSZmbkz, \
- X86InstrFMA3Group::X86FMA3KZeroMasked) \
- FMA3MA(Name##132PDZmbkz, Name##213PDZmbkz, Name##231PDZmbkz, \
- X86InstrFMA3Group::X86FMA3KZeroMasked)
-
-#define FMA3_AVX512_SCALAR_GROUP(Name) \
- FMA3RM(Name##132SSZr, Name##213SSZr, Name##231SSZr, \
- Name##132SSZm, Name##213SSZm, Name##231SSZm) \
- FMA3RM(Name##132SDZr, Name##213SDZr, Name##231SDZr, \
- Name##132SDZm, Name##213SDZm, Name##231SDZm) \
- FMA3RMA(Name##132SSZr_Int, Name##213SSZr_Int, Name##231SSZr_Int, \
- Name##132SSZm_Int, Name##213SSZm_Int, Name##231SSZm_Int, \
- X86InstrFMA3Group::X86FMA3Intrinsic) \
- FMA3RMA(Name##132SDZr_Int, Name##213SDZr_Int, Name##231SDZr_Int, \
- Name##132SDZm_Int, Name##213SDZm_Int, Name##231SDZm_Int, \
- X86InstrFMA3Group::X86FMA3Intrinsic) \
- FMA3RMA(Name##132SSZr_Intk, Name##213SSZr_Intk, Name##231SSZr_Intk, \
- Name##132SSZm_Intk, Name##213SSZm_Intk, Name##231SSZm_Intk, \
- X86InstrFMA3Group::X86FMA3Intrinsic | \
- X86InstrFMA3Group::X86FMA3KMergeMasked) \
- FMA3RMA(Name##132SDZr_Intk, Name##213SDZr_Intk, Name##231SDZr_Intk, \
- Name##132SDZm_Intk, Name##213SDZm_Intk, Name##231SDZm_Intk, \
- X86InstrFMA3Group::X86FMA3Intrinsic | \
- X86InstrFMA3Group::X86FMA3KMergeMasked) \
- FMA3RMA(Name##132SSZr_Intkz, Name##213SSZr_Intkz, Name##231SSZr_Intkz, \
- Name##132SSZm_Intkz, Name##213SSZm_Intkz, Name##231SSZm_Intkz, \
- X86InstrFMA3Group::X86FMA3Intrinsic | \
- X86InstrFMA3Group::X86FMA3KZeroMasked) \
- FMA3RMA(Name##132SDZr_Intkz, Name##213SDZr_Intkz, Name##231SDZr_Intkz, \
- Name##132SDZm_Intkz, Name##213SDZm_Intkz, Name##231SDZm_Intkz, \
- X86InstrFMA3Group::X86FMA3Intrinsic | \
- X86InstrFMA3Group::X86FMA3KZeroMasked) \
- FMA3RA(Name##132SSZrb_Int, Name##213SSZrb_Int, Name##231SSZrb_Int, \
- X86InstrFMA3Group::X86FMA3Intrinsic) \
- FMA3RA(Name##132SDZrb_Int, Name##213SDZrb_Int, Name##231SDZrb_Int, \
- X86InstrFMA3Group::X86FMA3Intrinsic) \
- FMA3RA(Name##132SSZrb_Intk, Name##213SSZrb_Intk, Name##231SSZrb_Intk, \
- X86InstrFMA3Group::X86FMA3Intrinsic | \
- X86InstrFMA3Group::X86FMA3KMergeMasked) \
- FMA3RA(Name##132SDZrb_Intk, Name##213SDZrb_Intk, Name##231SDZrb_Intk, \
- X86InstrFMA3Group::X86FMA3Intrinsic | \
- X86InstrFMA3Group::X86FMA3KMergeMasked) \
- FMA3RA(Name##132SSZrb_Intkz, Name##213SSZrb_Intkz, Name##231SSZrb_Intkz, \
- X86InstrFMA3Group::X86FMA3Intrinsic | \
- X86InstrFMA3Group::X86FMA3KZeroMasked) \
- FMA3RA(Name##132SDZrb_Intkz, Name##213SDZrb_Intkz, Name##231SDZrb_Intkz, \
- X86InstrFMA3Group::X86FMA3Intrinsic | \
- X86InstrFMA3Group::X86FMA3KZeroMasked)
-
-#define FMA3_AVX512_FULL_GROUP(Name) \
- FMA3_AVX512_VECTOR_GROUP(Name) \
- FMA3_AVX512_SCALAR_GROUP(Name)
+#define FMA3GROUP(Name, Suf, Attrs) \
+ { { X86::Name##132##Suf, X86::Name##213##Suf, X86::Name##231##Suf }, Attrs },
+
+#define FMA3GROUP_MASKED(Name, Suf, Attrs) \
+ FMA3GROUP(Name, Suf, Attrs) \
+ FMA3GROUP(Name, Suf##k, Attrs | X86InstrFMA3Group::KMergeMasked) \
+ FMA3GROUP(Name, Suf##kz, Attrs | X86InstrFMA3Group::KZeroMasked)
+
+#define FMA3GROUP_PACKED_WIDTHS(Name, Suf, Attrs) \
+ FMA3GROUP(Name, Suf##Ym, Attrs) \
+ FMA3GROUP(Name, Suf##Yr, Attrs) \
+ FMA3GROUP_MASKED(Name, Suf##Z128m, Attrs) \
+ FMA3GROUP_MASKED(Name, Suf##Z128r, Attrs) \
+ FMA3GROUP_MASKED(Name, Suf##Z256m, Attrs) \
+ FMA3GROUP_MASKED(Name, Suf##Z256r, Attrs) \
+ FMA3GROUP_MASKED(Name, Suf##Zm, Attrs) \
+ FMA3GROUP_MASKED(Name, Suf##Zr, Attrs) \
+ FMA3GROUP(Name, Suf##m, Attrs) \
+ FMA3GROUP(Name, Suf##r, Attrs)
+
+#define FMA3GROUP_PACKED(Name, Attrs) \
+ FMA3GROUP_PACKED_WIDTHS(Name, PD, Attrs) \
+ FMA3GROUP_PACKED_WIDTHS(Name, PS, Attrs)
+
+#define FMA3GROUP_SCALAR_WIDTHS(Name, Suf, Attrs) \
+ FMA3GROUP(Name, Suf##Zm, Attrs) \
+ FMA3GROUP_MASKED(Name, Suf##Zm_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
+ FMA3GROUP(Name, Suf##Zr, Attrs) \
+ FMA3GROUP_MASKED(Name, Suf##Zr_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
+ FMA3GROUP(Name, Suf##m, Attrs) \
+ FMA3GROUP(Name, Suf##m_Int, Attrs | X86InstrFMA3Group::Intrinsic) \
+ FMA3GROUP(Name, Suf##r, Attrs) \
+ FMA3GROUP(Name, Suf##r_Int, Attrs | X86InstrFMA3Group::Intrinsic)
+
+#define FMA3GROUP_SCALAR(Name, Attrs) \
+ FMA3GROUP_SCALAR_WIDTHS(Name, SD, Attrs) \
+ FMA3GROUP_SCALAR_WIDTHS(Name, SS, Attrs) \
+
+#define FMA3GROUP_FULL(Name, Attrs) \
+ FMA3GROUP_PACKED(Name, Attrs) \
+ FMA3GROUP_SCALAR(Name, Attrs)
static const X86InstrFMA3Group Groups[] = {
- FMA3_AVX2_FULL_GROUP(VFMADD)
- FMA3_AVX2_FULL_GROUP(VFMSUB)
- FMA3_AVX2_FULL_GROUP(VFNMADD)
- FMA3_AVX2_FULL_GROUP(VFNMSUB)
-
- FMA3_AVX2_VECTOR_GROUP(VFMADDSUB)
- FMA3_AVX2_VECTOR_GROUP(VFMSUBADD)
-
- FMA3_AVX512_FULL_GROUP(VFMADD)
- FMA3_AVX512_FULL_GROUP(VFMSUB)
- FMA3_AVX512_FULL_GROUP(VFNMADD)
- FMA3_AVX512_FULL_GROUP(VFNMSUB)
-
- FMA3_AVX512_VECTOR_GROUP(VFMADDSUB)
- FMA3_AVX512_VECTOR_GROUP(VFMSUBADD)
+ FMA3GROUP_FULL(VFMADD, 0)
+ FMA3GROUP_PACKED(VFMADDSUB, 0)
+ FMA3GROUP_FULL(VFMSUB, 0)
+ FMA3GROUP_PACKED(VFMSUBADD, 0)
+ FMA3GROUP_FULL(VFNMADD, 0)
+ FMA3GROUP_FULL(VFNMSUB, 0)
};
-namespace {
-
-struct X86InstrFMA3Info {
- /// A map that is used to find the group of FMA opcodes using any FMA opcode
- /// from the group.
- DenseMap<unsigned, const X86InstrFMA3Group *> OpcodeToGroup;
+#define FMA3GROUP_PACKED_AVX512_WIDTHS(Name, Type, Suf, Attrs) \
+ FMA3GROUP_MASKED(Name, Type##Z128##Suf, Attrs) \
+ FMA3GROUP_MASKED(Name, Type##Z256##Suf, Attrs) \
+ FMA3GROUP_MASKED(Name, Type##Z##Suf, Attrs)
+
+#define FMA3GROUP_PACKED_AVX512(Name, Suf, Attrs) \
+ FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PD, Suf, Attrs) \
+ FMA3GROUP_PACKED_AVX512_WIDTHS(Name, PS, Suf, Attrs)
+
+#define FMA3GROUP_PACKED_AVX512_ROUND(Name, Suf, Attrs) \
+ FMA3GROUP_MASKED(Name, PDZ##Suf, Attrs) \
+ FMA3GROUP_MASKED(Name, PSZ##Suf, Attrs)
+
+#define FMA3GROUP_SCALAR_AVX512(Name, Suf, Attrs) \
+ FMA3GROUP_MASKED(Name, SDZ##Suf, Attrs) \
+ FMA3GROUP_MASKED(Name, SSZ##Suf, Attrs)
+
+static const X86InstrFMA3Group BroadcastGroups[] = {
+ FMA3GROUP_PACKED_AVX512(VFMADD, mb, 0)
+ FMA3GROUP_PACKED_AVX512(VFMADDSUB, mb, 0)
+ FMA3GROUP_PACKED_AVX512(VFMSUB, mb, 0)
+ FMA3GROUP_PACKED_AVX512(VFMSUBADD, mb, 0)
+ FMA3GROUP_PACKED_AVX512(VFNMADD, mb, 0)
+ FMA3GROUP_PACKED_AVX512(VFNMSUB, mb, 0)
+};
- /// Constructor. Just creates an object of the class.
- X86InstrFMA3Info() {
- for (const X86InstrFMA3Group &G : Groups) {
- OpcodeToGroup[G.Opcodes[0]] = &G;
- OpcodeToGroup[G.Opcodes[1]] = &G;
- OpcodeToGroup[G.Opcodes[2]] = &G;
- }
- }
+static const X86InstrFMA3Group RoundGroups[] = {
+ FMA3GROUP_PACKED_AVX512_ROUND(VFMADD, rb, 0)
+ FMA3GROUP_SCALAR_AVX512(VFMADD, rb_Int, X86InstrFMA3Group::Intrinsic)
+ FMA3GROUP_PACKED_AVX512_ROUND(VFMADDSUB, rb, 0)
+ FMA3GROUP_PACKED_AVX512_ROUND(VFMSUB, rb, 0)
+ FMA3GROUP_SCALAR_AVX512(VFMSUB, rb_Int, X86InstrFMA3Group::Intrinsic)
+ FMA3GROUP_PACKED_AVX512_ROUND(VFMSUBADD, rb, 0)
+ FMA3GROUP_PACKED_AVX512_ROUND(VFNMADD, rb, 0)
+ FMA3GROUP_SCALAR_AVX512(VFNMADD, rb_Int, X86InstrFMA3Group::Intrinsic)
+ FMA3GROUP_PACKED_AVX512_ROUND(VFNMSUB, rb, 0)
+ FMA3GROUP_SCALAR_AVX512(VFNMSUB, rb_Int, X86InstrFMA3Group::Intrinsic)
};
+static void verifyTables() {
+#ifndef NDEBUG
+ static std::atomic<bool> TableChecked(false);
+ if (!TableChecked.load(std::memory_order_relaxed)) {
+ assert(std::is_sorted(std::begin(Groups), std::end(Groups)) &&
+ std::is_sorted(std::begin(RoundGroups), std::end(RoundGroups)) &&
+ std::is_sorted(std::begin(BroadcastGroups),
+ std::end(BroadcastGroups)) &&
+ "FMA3 tables not sorted!");
+ TableChecked.store(true, std::memory_order_relaxed);
+ }
+#endif
}
-static ManagedStatic<X86InstrFMA3Info> X86FMA3InfoObj;
-
/// Returns a reference to a group of FMA3 opcodes to where the given
/// \p Opcode is included. If the given \p Opcode is not recognized as FMA3
/// and not included into any FMA3 group, then nullptr is returned.
-const X86InstrFMA3Group *llvm::getFMA3Group(unsigned Opcode) {
- auto &Map = X86FMA3InfoObj->OpcodeToGroup;
- auto I = Map.find(Opcode);
- if (I != Map.end())
- return I->second;
-
- return nullptr;
+const X86InstrFMA3Group *llvm::getFMA3Group(unsigned Opcode, uint64_t TSFlags) {
+
+ // FMA3 instructions have a well defined encoding pattern we can exploit.
+ uint8_t BaseOpcode = X86II::getBaseOpcodeFor(TSFlags);
+ bool IsFMA3 = ((TSFlags & X86II::EncodingMask) == X86II::VEX ||
+ (TSFlags & X86II::EncodingMask) == X86II::EVEX) &&
+ (TSFlags & X86II::OpMapMask) == X86II::T8 &&
+ (TSFlags & X86II::OpPrefixMask) == X86II::PD &&
+ ((BaseOpcode >= 0x96 && BaseOpcode <= 0x9F) ||
+ (BaseOpcode >= 0xA6 && BaseOpcode <= 0xAF) ||
+ (BaseOpcode >= 0xB6 && BaseOpcode <= 0xBF));
+ if (!IsFMA3)
+ return nullptr;
+
+ verifyTables();
+
+ ArrayRef<X86InstrFMA3Group> Table;
+ if (TSFlags & X86II::EVEX_RC)
+ Table = makeArrayRef(RoundGroups);
+ else if (TSFlags & X86II::EVEX_B)
+ Table = makeArrayRef(BroadcastGroups);
+ else
+ Table = makeArrayRef(Groups);
+
+ // FMA 132 instructions have an opcode of 0x96-0x9F
+ // FMA 213 instructions have an opcode of 0xA6-0xAF
+ // FMA 231 instructions have an opcode of 0xB6-0xBF
+ unsigned FormIndex = ((BaseOpcode - 0x90) >> 4) & 0x3;
+
+ auto I = std::lower_bound(Table.begin(), Table.end(), Opcode,
+ [FormIndex](const X86InstrFMA3Group &Group,
+ unsigned Opcode) {
+ return Group.Opcodes[FormIndex] < Opcode;
+ });
+ assert(I != Table.end() && I->Opcodes[FormIndex] == Opcode &&
+ "Couldn't find FMA3 opcode!");
+ return I;
}
diff --git a/llvm/lib/Target/X86/X86InstrFMA3Info.h b/llvm/lib/Target/X86/X86InstrFMA3Info.h
index 6709a53072f..6eec1db98bf 100644
--- a/llvm/lib/Target/X86/X86InstrFMA3Info.h
+++ b/llvm/lib/Target/X86/X86InstrFMA3Info.h
@@ -43,17 +43,17 @@ struct X86InstrFMA3Group {
enum : uint16_t {
/// This bit must be set in the 'Attributes' field of FMA group if such
/// group of FMA opcodes consists of FMA intrinsic opcodes.
- X86FMA3Intrinsic = 0x1,
+ Intrinsic = 0x1,
/// This bit must be set in the 'Attributes' field of FMA group if such
/// group of FMA opcodes consists of AVX512 opcodes accepting a k-mask and
/// passing the elements from the 1st operand to the result of the operation
/// when the correpondings bits in the k-mask are unset.
- X86FMA3KMergeMasked = 0x2,
+ KMergeMasked = 0x2,
/// This bit must be set in the 'Attributes' field of FMA group if such
/// group of FMA opcodes consists of AVX512 opcodes accepting a k-zeromask.
- X86FMA3KZeroMasked = 0x4,
+ KZeroMasked = 0x4,
};
/// Returns the 132 form of FMA opcode.
@@ -72,26 +72,30 @@ struct X86InstrFMA3Group {
}
/// Returns true iff the group of FMA opcodes holds intrinsic opcodes.
- bool isIntrinsic() const { return (Attributes & X86FMA3Intrinsic) != 0; }
+ bool isIntrinsic() const { return (Attributes & Intrinsic) != 0; }
/// Returns true iff the group of FMA opcodes holds k-merge-masked opcodes.
bool isKMergeMasked() const {
- return (Attributes & X86FMA3KMergeMasked) != 0;
+ return (Attributes & KMergeMasked) != 0;
}
/// Returns true iff the group of FMA opcodes holds k-zero-masked opcodes.
- bool isKZeroMasked() const { return (Attributes & X86FMA3KZeroMasked) != 0; }
+ bool isKZeroMasked() const { return (Attributes &KZeroMasked) != 0; }
/// Returns true iff the group of FMA opcodes holds any of k-masked opcodes.
bool isKMasked() const {
- return (Attributes & (X86FMA3KMergeMasked | X86FMA3KZeroMasked)) != 0;
+ return (Attributes & (KMergeMasked | KZeroMasked)) != 0;
+ }
+
+ bool operator<(const X86InstrFMA3Group &RHS) const {
+ return Opcodes[0] < RHS.Opcodes[0];
}
};
/// Returns a reference to a group of FMA3 opcodes to where the given
/// \p Opcode is included. If the given \p Opcode is not recognized as FMA3
/// and not included into any FMA3 group, then nullptr is returned.
-const X86InstrFMA3Group *getFMA3Group(unsigned Opcode);
+const X86InstrFMA3Group *getFMA3Group(unsigned Opcode, uint64_t TSFlags);
} // end namespace llvm
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 2d057cf7df6..48f21d2abc8 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -1803,7 +1803,8 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
OpIdx1, OpIdx2);
}
- const X86InstrFMA3Group *FMA3Group = getFMA3Group(MI.getOpcode());
+ const X86InstrFMA3Group *FMA3Group = getFMA3Group(MI.getOpcode(),
+ MI.getDesc().TSFlags);
if (FMA3Group) {
unsigned Opc =
getFMA3OpcodeToCommuteOperands(MI, OpIdx1, OpIdx2, *FMA3Group);
@@ -2039,7 +2040,8 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
}
default:
- const X86InstrFMA3Group *FMA3Group = getFMA3Group(MI.getOpcode());
+ const X86InstrFMA3Group *FMA3Group = getFMA3Group(MI.getOpcode(),
+ MI.getDesc().TSFlags);
if (FMA3Group)
return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2,
FMA3Group->isIntrinsic());
OpenPOWER on IntegriCloud