summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorVyacheslav Klochkov <vyacheslav.n.klochkov@gmail.com>2016-08-11 22:07:33 +0000
committerVyacheslav Klochkov <vyacheslav.n.klochkov@gmail.com>2016-08-11 22:07:33 +0000
commit6daefcf6262299a4233231c49b9048c1062dbc90 (patch)
tree31e71c7a213b08252b475bd77a0e0d33a89885d4 /llvm/lib
parent2c3f501052dac7baa3d937d543e60662089145c7 (diff)
downloadbcm5719-llvm-6daefcf6262299a4233231c49b9048c1062dbc90.tar.gz
bcm5719-llvm-6daefcf6262299a4233231c49b9048c1062dbc90.zip
X86-FMA3: Implemented commute transformation for EVEX/AVX512 FMA3 opcodes.
This helped to improved memory-folding and register coalescing optimizations. Also, this patch fixed the tracker #17229. Reviewer: Craig Topper. Differential Revision: https://reviews.llvm.org/D23108 llvm-svn: 278431
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/CMakeLists.txt1
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td57
-rw-r--r--llvm/lib/Target/X86/X86InstrFMA3Info.cpp284
-rw-r--r--llvm/lib/Target/X86/X86InstrFMA3Info.h315
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp640
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.h32
6 files changed, 767 insertions, 562 deletions
diff --git a/llvm/lib/Target/X86/CMakeLists.txt b/llvm/lib/Target/X86/CMakeLists.txt
index 894090f7897..86792785302 100644
--- a/llvm/lib/Target/X86/CMakeLists.txt
+++ b/llvm/lib/Target/X86/CMakeLists.txt
@@ -24,6 +24,7 @@ set(sources
X86FrameLowering.cpp
X86ISelDAGToDAG.cpp
X86ISelLowering.cpp
+ X86InstrFMA3Info.cpp
X86InstrInfo.cpp
X86MCInstLower.cpp
X86MachineFunctionInfo.cpp
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 44b5bea1557..50791e9fe5e 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -194,7 +194,8 @@ multiclass AVX512_maskable_custom<bits<8> O, Format F,
list<dag> ZeroMaskingPattern,
string MaskingConstraint = "",
InstrItinClass itin = NoItinerary,
- bit IsCommutable = 0> {
+ bit IsCommutable = 0,
+ bit IsKCommutable = 0> {
let isCommutable = IsCommutable in
def NAME: AVX512<O, F, Outs, Ins,
OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
@@ -202,7 +203,7 @@ multiclass AVX512_maskable_custom<bits<8> O, Format F,
Pattern, itin>;
// Prefer over VMOV*rrk Pat<>
- let AddedComplexity = 20 in
+ let AddedComplexity = 20, isCommutable = IsKCommutable in
def NAME#k: AVX512<O, F, Outs, MaskingIns,
OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
"$dst {${mask}}, "#IntelSrcAsm#"}",
@@ -210,8 +211,11 @@ multiclass AVX512_maskable_custom<bits<8> O, Format F,
EVEX_K {
// In case of the 3src subclass this is overridden with a let.
string Constraints = MaskingConstraint;
- }
- let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<>
+ }
+
+ // Zero mask does not add any restrictions to commute operands transformation.
+ // So, it is Ok to use IsCommutable instead of IsKCommutable.
+ let AddedComplexity = 30, isCommutable = IsCommutable in // Prefer over VMOV*rrkz Pat<>
def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
"$dst {${mask}} {z}, "#IntelSrcAsm#"}",
@@ -231,14 +235,16 @@ multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
SDNode Select = vselect,
string MaskingConstraint = "",
InstrItinClass itin = NoItinerary,
- bit IsCommutable = 0> :
+ bit IsCommutable = 0,
+ bit IsKCommutable = 0> :
AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
AttSrcAsm, IntelSrcAsm,
[(set _.RC:$dst, RHS)],
[(set _.RC:$dst, MaskingRHS)],
[(set _.RC:$dst,
(Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
- MaskingConstraint, NoItinerary, IsCommutable>;
+ MaskingConstraint, NoItinerary, IsCommutable,
+ IsKCommutable>;
// This multiclass generates the unconditional/non-masking, the masking and
// the zero-masking variant of the vector instruction. In the masking case, the
@@ -248,13 +254,14 @@ multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
string AttSrcAsm, string IntelSrcAsm,
dag RHS,
InstrItinClass itin = NoItinerary,
- bit IsCommutable = 0, SDNode Select = vselect> :
+ bit IsCommutable = 0, bit IsKCommutable = 0,
+ SDNode Select = vselect> :
AVX512_maskable_common<O, F, _, Outs, Ins,
!con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
!con((ins _.KRCWM:$mask), Ins),
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
(Select _.KRCWM:$mask, RHS, _.RC:$src0), Select,
- "$src0 = $dst", itin, IsCommutable>;
+ "$src0 = $dst", itin, IsCommutable, IsKCommutable>;
// This multiclass generates the unconditional/non-masking, the masking and
// the zero-masking variant of the scalar instruction.
@@ -278,15 +285,17 @@ multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag NonTiedIns, string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
- dag RHS> :
+ dag RHS, bit IsCommutable = 0,
+ bit IsKCommutable = 0> :
AVX512_maskable_common<O, F, _, Outs,
!con((ins _.RC:$src1), NonTiedIns),
!con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
!con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
- (vselect _.KRCWM:$mask, RHS, _.RC:$src1)>;
+ (vselect _.KRCWM:$mask, RHS, _.RC:$src1),
+ vselect, "", NoItinerary, IsCommutable, IsKCommutable>;
-// Similar to AVX512_maskable_3rc but in this case the input VT for the tied
+// Similar to AVX512_maskable_3src but in this case the input VT for the tied
// operand differs from the output VT. This requires a bitconvert on
// the preserved vector going into the vselect.
multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
@@ -305,14 +314,16 @@ multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag NonTiedIns, string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
- dag RHS> :
+ dag RHS, bit IsCommutable = 0,
+ bit IsKCommutable = 0> :
AVX512_maskable_common<O, F, _, Outs,
!con((ins _.RC:$src1), NonTiedIns),
!con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
!con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
(X86selects _.KRCWM:$mask, RHS, _.RC:$src1),
- X86selects>;
+ X86selects, "", NoItinerary, IsCommutable,
+ IsKCommutable>;
multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag Ins,
@@ -4842,13 +4853,13 @@ multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
- (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3))>,
+ (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
AVX512FMA3Base;
defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
- (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3)))>,
+ (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
AVX512FMA3Base;
defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
@@ -4856,7 +4867,7 @@ multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
!strconcat("$src2, ${src3}", _.BroadcastStr ),
(OpNode _.RC:$src2,
- _.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>,
+ _.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))), 1, 0>,
AVX512FMA3Base, EVEX_B;
}
@@ -4875,7 +4886,7 @@ multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
- (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 imm:$rc)))>,
+ (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 imm:$rc))), 1, 1>,
AVX512FMA3Base, EVEX_B, EVEX_RC;
}
@@ -4917,13 +4928,13 @@ multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
- (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1))>,
+ (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
AVX512FMA3Base;
defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
- (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
+ (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
AVX512FMA3Base;
defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
@@ -4932,7 +4943,7 @@ multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
"$src2, ${src3}"##_.BroadcastStr,
(_.VT (OpNode _.RC:$src2,
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
- _.RC:$src1))>, AVX512FMA3Base, EVEX_B;
+ _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B;
}
// Additional patterns for folding broadcast nodes in other orders.
@@ -4960,7 +4971,7 @@ multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
- (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc)))>,
+ (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc))), 1, 1>,
AVX512FMA3Base, EVEX_B, EVEX_RC;
}
@@ -6036,7 +6047,7 @@ multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
(X86cvtps2ph (_src.VT _src.RC:$src1),
(i32 imm:$src2),
(i32 FROUND_CURRENT)),
- NoItinerary, 0, X86select>, AVX512AIi8Base;
+ NoItinerary, 0, 0, X86select>, AVX512AIi8Base;
def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
(ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -6056,7 +6067,7 @@ multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src> {
(X86cvtps2ph (_src.VT _src.RC:$src1),
(i32 imm:$src2),
(i32 FROUND_NO_EXC)),
- NoItinerary, 0, X86select>, EVEX_B, AVX512AIi8Base;
+ NoItinerary, 0, 0, X86select>, EVEX_B, AVX512AIi8Base;
}
let Predicates = [HasAVX512] in {
defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem>,
diff --git a/llvm/lib/Target/X86/X86InstrFMA3Info.cpp b/llvm/lib/Target/X86/X86InstrFMA3Info.cpp
new file mode 100644
index 00000000000..7bd8415e9e4
--- /dev/null
+++ b/llvm/lib/Target/X86/X86InstrFMA3Info.cpp
@@ -0,0 +1,284 @@
+//===-- X86InstrFMA3Info.cpp - X86 FMA3 Instruction Information -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the classes providing information
+// about existing X86 FMA3 opcodes, classifying and grouping them.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86InstrFMA3Info.h"
+#include "X86InstrInfo.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Threading.h"
+
+/// This flag is used in the method llvm::call_once() used below to make the
+/// initialization of the map 'OpcodeToGroup' thread safe.
+LLVM_DEFINE_ONCE_FLAG(InitGroupsOnceFlag);
+
+static ManagedStatic<X86InstrFMA3Info> X86InstrFMA3InfoObj;
+X86InstrFMA3Info *X86InstrFMA3Info::getX86InstrFMA3Info() {
+ return &*X86InstrFMA3InfoObj;
+}
+
+void X86InstrFMA3Info::initRMGroup(const uint16_t *RegOpcodes,
+ const uint16_t *MemOpcodes, unsigned Attr) {
+ // Create a new instance of this class that would hold a group of FMA opcodes.
+ X86InstrFMA3Group *G = new X86InstrFMA3Group(RegOpcodes, MemOpcodes, Attr);
+
+ // Add the references from indvidual opcodes to the group holding them.
+ assert((!OpcodeToGroup[RegOpcodes[0]] && !OpcodeToGroup[RegOpcodes[1]] &&
+ !OpcodeToGroup[RegOpcodes[2]] && !OpcodeToGroup[MemOpcodes[0]] &&
+ !OpcodeToGroup[MemOpcodes[1]] && !OpcodeToGroup[MemOpcodes[2]]) &&
+ "Duplication or rewrite of elements in OpcodeToGroup.");
+ OpcodeToGroup[RegOpcodes[0]] = G;
+ OpcodeToGroup[RegOpcodes[1]] = G;
+ OpcodeToGroup[RegOpcodes[2]] = G;
+ OpcodeToGroup[MemOpcodes[0]] = G;
+ OpcodeToGroup[MemOpcodes[1]] = G;
+ OpcodeToGroup[MemOpcodes[2]] = G;
+}
+
+void X86InstrFMA3Info::initRGroup(const uint16_t *RegOpcodes, unsigned Attr) {
+ // Create a new instance of this class that would hold a group of FMA opcodes.
+ X86InstrFMA3Group *G = new X86InstrFMA3Group(RegOpcodes, nullptr, Attr);
+
+ // Add the references from indvidual opcodes to the group holding them.
+ assert((!OpcodeToGroup[RegOpcodes[0]] && !OpcodeToGroup[RegOpcodes[1]] &&
+ !OpcodeToGroup[RegOpcodes[2]]) &&
+ "Duplication or rewrite of elements in OpcodeToGroup.");
+ OpcodeToGroup[RegOpcodes[0]] = G;
+ OpcodeToGroup[RegOpcodes[1]] = G;
+ OpcodeToGroup[RegOpcodes[2]] = G;
+}
+
+void X86InstrFMA3Info::initMGroup(const uint16_t *MemOpcodes, unsigned Attr) {
+ // Create a new instance of this class that would hold a group of FMA opcodes.
+ X86InstrFMA3Group *G = new X86InstrFMA3Group(nullptr, MemOpcodes, Attr);
+
+ // Add the references from indvidual opcodes to the group holding them.
+ assert((!OpcodeToGroup[MemOpcodes[0]] && !OpcodeToGroup[MemOpcodes[1]] &&
+ !OpcodeToGroup[MemOpcodes[2]]) &&
+ "Duplication or rewrite of elements in OpcodeToGroup.");
+ OpcodeToGroup[MemOpcodes[0]] = G;
+ OpcodeToGroup[MemOpcodes[1]] = G;
+ OpcodeToGroup[MemOpcodes[2]] = G;
+}
+
+#define FMA3RM(R132, R213, R231, M132, M213, M231) \
+ static const uint16_t Reg##R132[3] = {X86::R132, X86::R213, X86::R231}; \
+ static const uint16_t Mem##R132[3] = {X86::M132, X86::M213, X86::M231}; \
+ initRMGroup(Reg##R132, Mem##R132);
+
+#define FMA3RMA(R132, R213, R231, M132, M213, M231, Attrs) \
+ static const uint16_t Reg##R132[3] = {X86::R132, X86::R213, X86::R231}; \
+ static const uint16_t Mem##R132[3] = {X86::M132, X86::M213, X86::M231}; \
+ initRMGroup(Reg##R132, Mem##R132, (Attrs));
+
+#define FMA3R(R132, R213, R231) \
+ static const uint16_t Reg##R132[3] = {X86::R132, X86::R213, X86::R231}; \
+ initRGroup(Reg##R132);
+
+#define FMA3RA(R132, R213, R231, Attrs) \
+ static const uint16_t Reg##R132[3] = {X86::R132, X86::R213, X86::R231}; \
+ initRGroup(Reg##R132, (Attrs));
+
+#define FMA3M(M132, M213, M231) \
+ static const uint16_t Mem##M132[3] = {X86::M132, X86::M213, X86::M231}; \
+ initMGroup(Mem##M132);
+
+#define FMA3MA(M132, M213, M231, Attrs) \
+ static const uint16_t Mem##M132[3] = {X86::M132, X86::M213, X86::M231}; \
+ initMGroup(Mem##M132, (Attrs));
+
+#define FMA3_AVX2_VECTOR_GROUP(Name) \
+ FMA3RM(Name##132PSr, Name##213PSr, Name##231PSr, \
+ Name##132PSm, Name##213PSm, Name##231PSm); \
+ FMA3RM(Name##132PDr, Name##213PDr, Name##231PDr, \
+ Name##132PDm, Name##213PDm, Name##231PDm); \
+ FMA3RM(Name##132PSYr, Name##213PSYr, Name##231PSYr, \
+ Name##132PSYm, Name##213PSYm, Name##231PSYm); \
+ FMA3RM(Name##132PDYr, Name##213PDYr, Name##231PDYr, \
+ Name##132PDYm, Name##213PDYm, Name##231PDYm);
+
+#define FMA3_AVX2_SCALAR_GROUP(Name) \
+ FMA3RM(Name##132SSr, Name##213SSr, Name##231SSr, \
+ Name##132SSm, Name##213SSm, Name##231SSm); \
+ FMA3RM(Name##132SDr, Name##213SDr, Name##231SDr, \
+ Name##132SDm, Name##213SDm, Name##231SDm); \
+ FMA3RMA(Name##132SSr_Int, Name##213SSr_Int, Name##231SSr_Int, \
+ Name##132SSm_Int, Name##213SSm_Int, Name##231SSm_Int, \
+ X86InstrFMA3Group::X86FMA3Intrinsic); \
+ FMA3RMA(Name##132SDr_Int, Name##213SDr_Int, Name##231SDr_Int, \
+ Name##132SDm_Int, Name##213SDm_Int, Name##231SDm_Int, \
+ X86InstrFMA3Group::X86FMA3Intrinsic);
+
+#define FMA3_AVX2_FULL_GROUP(Name) \
+ FMA3_AVX2_VECTOR_GROUP(Name); \
+ FMA3_AVX2_SCALAR_GROUP(Name);
+
+#define FMA3_AVX512_VECTOR_GROUP(Name) \
+ FMA3RM(Name##132PSZ128r, Name##213PSZ128r, Name##231PSZ128r, \
+ Name##132PSZ128m, Name##213PSZ128m, Name##231PSZ128m); \
+ FMA3RM(Name##132PDZ128r, Name##213PDZ128r, Name##231PDZ128r, \
+ Name##132PDZ128m, Name##213PDZ128m, Name##231PDZ128m); \
+ FMA3RM(Name##132PSZ256r, Name##213PSZ256r, Name##231PSZ256r, \
+ Name##132PSZ256m, Name##213PSZ256m, Name##231PSZ256m); \
+ FMA3RM(Name##132PDZ256r, Name##213PDZ256r, Name##231PDZ256r, \
+ Name##132PDZ256m, Name##213PDZ256m, Name##231PDZ256m); \
+ FMA3RM(Name##132PSZr, Name##213PSZr, Name##231PSZr, \
+ Name##132PSZm, Name##213PSZm, Name##231PSZm); \
+ FMA3RM(Name##132PDZr, Name##213PDZr, Name##231PDZr, \
+ Name##132PDZm, Name##213PDZm, Name##231PDZm); \
+ FMA3RMA(Name##132PSZ128rk, Name##213PSZ128rk, Name##231PSZ128rk, \
+ Name##132PSZ128mk, Name##213PSZ128mk, Name##231PSZ128mk, \
+ X86InstrFMA3Group::X86FMA3KMergeMasked); \
+ FMA3RMA(Name##132PDZ128rk, Name##213PDZ128rk, Name##231PDZ128rk, \
+ Name##132PDZ128mk, Name##213PDZ128mk, Name##231PDZ128mk, \
+ X86InstrFMA3Group::X86FMA3KMergeMasked); \
+ FMA3RMA(Name##132PSZ256rk, Name##213PSZ256rk, Name##231PSZ256rk, \
+ Name##132PSZ256mk, Name##213PSZ256mk, Name##231PSZ256mk, \
+ X86InstrFMA3Group::X86FMA3KMergeMasked); \
+ FMA3RMA(Name##132PDZ256rk, Name##213PDZ256rk, Name##231PDZ256rk, \
+ Name##132PDZ256mk, Name##213PDZ256mk, Name##231PDZ256mk, \
+ X86InstrFMA3Group::X86FMA3KMergeMasked); \
+ FMA3RMA(Name##132PSZrk, Name##213PSZrk, Name##231PSZrk, \
+ Name##132PSZmk, Name##213PSZmk, Name##231PSZmk, \
+ X86InstrFMA3Group::X86FMA3KMergeMasked); \
+ FMA3RMA(Name##132PDZrk, Name##213PDZrk, Name##231PDZrk, \
+ Name##132PDZmk, Name##213PDZmk, Name##231PDZmk, \
+ X86InstrFMA3Group::X86FMA3KMergeMasked); \
+ FMA3RMA(Name##132PSZ128rkz, Name##213PSZ128rkz, Name##231PSZ128rkz, \
+ Name##132PSZ128mkz, Name##213PSZ128mkz, Name##231PSZ128mkz, \
+ X86InstrFMA3Group::X86FMA3KZeroMasked); \
+ FMA3RMA(Name##132PDZ128rkz, Name##213PDZ128rkz, Name##231PDZ128rkz, \
+ Name##132PDZ128mkz, Name##213PDZ128mkz, Name##231PDZ128mkz, \
+ X86InstrFMA3Group::X86FMA3KZeroMasked); \
+ FMA3RMA(Name##132PSZ256rkz, Name##213PSZ256rkz, Name##231PSZ256rkz, \
+ Name##132PSZ256mkz, Name##213PSZ256mkz, Name##231PSZ256mkz, \
+ X86InstrFMA3Group::X86FMA3KZeroMasked); \
+ FMA3RMA(Name##132PDZ256rkz, Name##213PDZ256rkz, Name##231PDZ256rkz, \
+ Name##132PDZ256mkz, Name##213PDZ256mkz, Name##231PDZ256mkz, \
+ X86InstrFMA3Group::X86FMA3KZeroMasked); \
+ FMA3RMA(Name##132PSZrkz, Name##213PSZrkz, Name##231PSZrkz, \
+ Name##132PSZmkz, Name##213PSZmkz, Name##231PSZmkz, \
+ X86InstrFMA3Group::X86FMA3KZeroMasked); \
+ FMA3RMA(Name##132PDZrkz, Name##213PDZrkz, Name##231PDZrkz, \
+ Name##132PDZmkz, Name##213PDZmkz, Name##231PDZmkz, \
+ X86InstrFMA3Group::X86FMA3KZeroMasked); \
+ FMA3R(Name##132PSZrb, Name##213PSZrb, Name##231PSZrb); \
+ FMA3R(Name##132PDZrb, Name##213PDZrb, Name##231PDZrb); \
+ FMA3RA(Name##132PSZrbk, Name##213PSZrbk, Name##231PSZrbk, \
+ X86InstrFMA3Group::X86FMA3KMergeMasked); \
+ FMA3RA(Name##132PDZrbk, Name##213PDZrbk, Name##231PDZrbk, \
+ X86InstrFMA3Group::X86FMA3KMergeMasked); \
+ FMA3RA(Name##132PSZrbkz, Name##213PSZrbkz, Name##231PSZrbkz, \
+ X86InstrFMA3Group::X86FMA3KZeroMasked); \
+ FMA3RA(Name##132PDZrbkz, Name##213PDZrbkz, Name##231PDZrbkz, \
+ X86InstrFMA3Group::X86FMA3KZeroMasked); \
+ FMA3M(Name##132PSZ128mb, Name##213PSZ128mb, Name##231PSZ128mb); \
+ FMA3M(Name##132PDZ128mb, Name##213PDZ128mb, Name##231PDZ128mb); \
+ FMA3M(Name##132PSZ256mb, Name##213PSZ256mb, Name##231PSZ256mb); \
+ FMA3M(Name##132PDZ256mb, Name##213PDZ256mb, Name##231PDZ256mb); \
+ FMA3M(Name##132PSZmb, Name##213PSZmb, Name##231PSZmb); \
+ FMA3M(Name##132PDZmb, Name##213PDZmb, Name##231PDZmb); \
+ FMA3MA(Name##132PSZ128mbk, Name##213PSZ128mbk, Name##231PSZ128mbk, \
+ X86InstrFMA3Group::X86FMA3KMergeMasked); \
+ FMA3MA(Name##132PDZ128mbk, Name##213PDZ128mbk, Name##231PDZ128mbk, \
+ X86InstrFMA3Group::X86FMA3KMergeMasked); \
+ FMA3MA(Name##132PSZ256mbk, Name##213PSZ256mbk, Name##231PSZ256mbk, \
+ X86InstrFMA3Group::X86FMA3KMergeMasked); \
+ FMA3MA(Name##132PDZ256mbk, Name##213PDZ256mbk, Name##231PDZ256mbk, \
+ X86InstrFMA3Group::X86FMA3KMergeMasked); \
+ FMA3MA(Name##132PSZmbk, Name##213PSZmbk, Name##231PSZmbk, \
+ X86InstrFMA3Group::X86FMA3KMergeMasked); \
+ FMA3MA(Name##132PDZmbk, Name##213PDZmbk, Name##231PDZmbk, \
+ X86InstrFMA3Group::X86FMA3KMergeMasked); \
+ FMA3MA(Name##132PSZ128mbkz, Name##213PSZ128mbkz, Name##231PSZ128mbkz, \
+ X86InstrFMA3Group::X86FMA3KZeroMasked); \
+ FMA3MA(Name##132PDZ128mbkz, Name##213PDZ128mbkz, Name##231PDZ128mbkz, \
+ X86InstrFMA3Group::X86FMA3KZeroMasked); \
+ FMA3MA(Name##132PSZ256mbkz, Name##213PSZ256mbkz, Name##231PSZ256mbkz, \
+ X86InstrFMA3Group::X86FMA3KZeroMasked); \
+ FMA3MA(Name##132PDZ256mbkz, Name##213PDZ256mbkz, Name##231PDZ256mbkz, \
+ X86InstrFMA3Group::X86FMA3KZeroMasked); \
+ FMA3MA(Name##132PSZmbkz, Name##213PSZmbkz, Name##231PSZmbkz, \
+ X86InstrFMA3Group::X86FMA3KZeroMasked); \
+ FMA3MA(Name##132PDZmbkz, Name##213PDZmbkz, Name##231PDZmbkz, \
+ X86InstrFMA3Group::X86FMA3KZeroMasked);
+
+#define FMA3_AVX512_SCALAR_GROUP(Name) \
+ FMA3RM(Name##132SSZr, Name##213SSZr, Name##231SSZr, \
+ Name##132SSZm, Name##213SSZm, Name##231SSZm); \
+ FMA3RM(Name##132SDZr, Name##213SDZr, Name##231SDZr, \
+ Name##132SDZm, Name##213SDZm, Name##231SDZm); \
+ FMA3RMA(Name##132SSZr_Int, Name##213SSZr_Int, Name##231SSZr_Int, \
+ Name##132SSZm_Int, Name##213SSZm_Int, Name##231SSZm_Int, \
+ X86InstrFMA3Group::X86FMA3Intrinsic); \
+ FMA3RMA(Name##132SDZr_Int, Name##213SDZr_Int, Name##231SDZr_Int, \
+ Name##132SDZm_Int, Name##213SDZm_Int, Name##231SDZm_Int, \
+ X86InstrFMA3Group::X86FMA3Intrinsic); \
+ FMA3RMA(Name##132SSZr_Intk, Name##213SSZr_Intk, Name##231SSZr_Intk, \
+ Name##132SSZm_Intk, Name##213SSZm_Intk, Name##231SSZm_Intk, \
+ X86InstrFMA3Group::X86FMA3Intrinsic | \
+ X86InstrFMA3Group::X86FMA3KMergeMasked); \
+ FMA3RMA(Name##132SDZr_Intk, Name##213SDZr_Intk, Name##231SDZr_Intk, \
+ Name##132SDZm_Intk, Name##213SDZm_Intk, Name##231SDZm_Intk, \
+ X86InstrFMA3Group::X86FMA3Intrinsic | \
+ X86InstrFMA3Group::X86FMA3KMergeMasked); \
+ FMA3RMA(Name##132SSZr_Intkz, Name##213SSZr_Intkz, Name##231SSZr_Intkz, \
+ Name##132SSZm_Intkz, Name##213SSZm_Intkz, Name##231SSZm_Intkz, \
+ X86InstrFMA3Group::X86FMA3Intrinsic | \
+ X86InstrFMA3Group::X86FMA3KZeroMasked); \
+ FMA3RMA(Name##132SDZr_Intkz, Name##213SDZr_Intkz, Name##231SDZr_Intkz, \
+ Name##132SDZm_Intkz, Name##213SDZm_Intkz, Name##231SDZm_Intkz, \
+ X86InstrFMA3Group::X86FMA3Intrinsic | \
+ X86InstrFMA3Group::X86FMA3KZeroMasked); \
+ FMA3RA(Name##132SSZrb_Int, Name##213SSZrb_Int, Name##231SSZrb_Int, \
+ X86InstrFMA3Group::X86FMA3Intrinsic); \
+ FMA3RA(Name##132SDZrb_Int, Name##213SDZrb_Int, Name##231SDZrb_Int, \
+ X86InstrFMA3Group::X86FMA3Intrinsic); \
+ FMA3RA(Name##132SSZrb_Intk, Name##213SSZrb_Intk, Name##231SSZrb_Intk, \
+ X86InstrFMA3Group::X86FMA3Intrinsic | \
+ X86InstrFMA3Group::X86FMA3KMergeMasked); \
+ FMA3RA(Name##132SDZrb_Intk, Name##213SDZrb_Intk, Name##231SDZrb_Intk, \
+ X86InstrFMA3Group::X86FMA3Intrinsic | \
+ X86InstrFMA3Group::X86FMA3KMergeMasked); \
+ FMA3RA(Name##132SSZrb_Intkz, Name##213SSZrb_Intkz, Name##231SSZrb_Intkz, \
+ X86InstrFMA3Group::X86FMA3Intrinsic | \
+ X86InstrFMA3Group::X86FMA3KZeroMasked); \
+ FMA3RA(Name##132SDZrb_Intkz, Name##213SDZrb_Intkz, Name##231SDZrb_Intkz, \
+ X86InstrFMA3Group::X86FMA3Intrinsic | \
+ X86InstrFMA3Group::X86FMA3KZeroMasked);
+
+#define FMA3_AVX512_FULL_GROUP(Name) \
+ FMA3_AVX512_VECTOR_GROUP(Name); \
+ FMA3_AVX512_SCALAR_GROUP(Name);
+
+void X86InstrFMA3Info::initGroupsOnceImpl() {
+ FMA3_AVX2_FULL_GROUP(VFMADD);
+ FMA3_AVX2_FULL_GROUP(VFMSUB);
+ FMA3_AVX2_FULL_GROUP(VFNMADD);
+ FMA3_AVX2_FULL_GROUP(VFNMSUB);
+
+ FMA3_AVX2_VECTOR_GROUP(VFMADDSUB);
+ FMA3_AVX2_VECTOR_GROUP(VFMSUBADD);
+
+ FMA3_AVX512_FULL_GROUP(VFMADD);
+ FMA3_AVX512_FULL_GROUP(VFMSUB);
+ FMA3_AVX512_FULL_GROUP(VFNMADD);
+ FMA3_AVX512_FULL_GROUP(VFNMSUB);
+
+ FMA3_AVX512_VECTOR_GROUP(VFMADDSUB);
+ FMA3_AVX512_VECTOR_GROUP(VFMSUBADD);
+}
+
+void X86InstrFMA3Info::initGroupsOnce() {
+ llvm::call_once(InitGroupsOnceFlag,
+ []() { getX86InstrFMA3Info()->initGroupsOnceImpl(); });
+}
diff --git a/llvm/lib/Target/X86/X86InstrFMA3Info.h b/llvm/lib/Target/X86/X86InstrFMA3Info.h
new file mode 100644
index 00000000000..987ff9e30e5
--- /dev/null
+++ b/llvm/lib/Target/X86/X86InstrFMA3Info.h
@@ -0,0 +1,315 @@
+//===-- X86InstrFMA3Info.h - X86 FMA3 Instruction Information -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the classes providing information
+// about existing X86 FMA3 opcodes, classifying and grouping them.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_X86_UTILS_X86INSTRFMA3INFO_H
+#define LLVM_LIB_TARGET_X86_UTILS_X86INSTRFMA3INFO_H
+
+#include "X86.h"
+#include "llvm/ADT/DenseMap.h"
+#include <cassert>
+#include <set>
+
+using namespace llvm;
+
+/// This class is used to group {132, 213, 231} forms of FMA opcodes together.
+/// Each of the groups has either 3 register opcodes, 3 memory opcodes,
+/// or 6 register and memory opcodes. Also, each group has an attrubutes field
+/// describing it.
+class X86InstrFMA3Group {
+private:
+ /// Reference to an array holding 3 forms of register FMA opcodes.
+ /// It may be set to nullptr if the group of FMA opcodes does not have
+ /// any register form opcodes.
+ const uint16_t *RegOpcodes;
+
+ /// Reference to an array holding 3 forms of memory FMA opcodes.
+ /// It may be set to nullptr if the group of FMA opcodes does not have
+ /// any register form opcodes.
+ const uint16_t *MemOpcodes;
+
+ /// This bitfield specifies the attributes associated with the created
+ /// FMA groups of opcodes.
+ unsigned Attributes;
+
+ static const unsigned Form132 = 0;
+ static const unsigned Form213 = 1;
+ static const unsigned Form231 = 2;
+
+public:
+ /// This bit must be set in the 'Attributes' field of FMA group if such
+ /// group of FMA opcodes consists of FMA intrinsic opcodes.
+ static const unsigned X86FMA3Intrinsic = 0x1;
+
+ /// This bit must be set in the 'Attributes' field of FMA group if such
+ /// group of FMA opcodes consists of AVX512 opcodes accepting a k-mask and
+ /// passing the elements from the 1st operand to the result of the operation
+ /// when the correpondings bits in the k-mask are unset.
+ static const unsigned X86FMA3KMergeMasked = 0x2;
+
+ /// This bit must be set in the 'Attributes' field of FMA group if such
+ /// group of FMA opcodes consists of AVX512 opcodes accepting a k-zeromask.
+ static const unsigned X86FMA3KZeroMasked = 0x4;
+
+ /// Constructor. Creates a new group of FMA opcodes with three register form
+ /// FMA opcodes \p RegOpcodes and three memory form FMA opcodes \p MemOpcodes.
+ /// The parameters \p RegOpcodes and \p MemOpcodes may be set to nullptr,
+ /// which means that the created group of FMA opcodes does not have the
+ /// corresponding (register or memory) opcodes.
+ /// The parameter \p Attr specifies the attributes describing the created
+ /// group.
+ X86InstrFMA3Group(const uint16_t *RegOpcodes, const uint16_t *MemOpcodes,
+ unsigned Attr)
+ : RegOpcodes(RegOpcodes), MemOpcodes(MemOpcodes), Attributes(Attr) {
+ assert((RegOpcodes || MemOpcodes) &&
+ "Cannot create a group not having any opcodes.");
+ }
+
+ /// Returns a memory form opcode that is the equivalent of the given register
+ /// form opcode \p RegOpcode. 0 is returned if the group does not have
+ /// either register of memory opcodes.
+ unsigned getMemOpcode(unsigned RegOpcode) const {
+ if (!RegOpcodes || !MemOpcodes)
+ return 0;
+ for (unsigned Form = 0; Form < 3; Form++)
+ if (RegOpcodes[Form] == RegOpcode)
+ return MemOpcodes[Form];
+ return 0;
+ }
+
+ /// Returns the 132 form of FMA register opcode.
+ unsigned getReg132Opcode() const {
+ assert(RegOpcodes && "The group does not have register opcodes.");
+ return RegOpcodes[Form132];
+ }
+
+ /// Returns the 213 form of FMA register opcode.
+ unsigned getReg213Opcode() const {
+ assert(RegOpcodes && "The group does not have register opcodes.");
+ return RegOpcodes[Form213];
+ }
+
+ /// Returns the 231 form of FMA register opcode.
+ unsigned getReg231Opcode() const {
+ assert(RegOpcodes && "The group does not have register opcodes.");
+ return RegOpcodes[Form231];
+ }
+
+ /// Returns the 132 form of FMA memory opcode.
+ unsigned getMem132Opcode() const {
+ assert(MemOpcodes && "The group does not have memory opcodes.");
+ return MemOpcodes[Form132];
+ }
+
+ /// Returns the 213 form of FMA memory opcode.
+ unsigned getMem213Opcode() const {
+ assert(MemOpcodes && "The group does not have memory opcodes.");
+ return MemOpcodes[Form213];
+ }
+
+ /// Returns the 231 form of FMA memory opcode.
+ unsigned getMem231Opcode() const {
+ assert(MemOpcodes && "The group does not have memory opcodes.");
+ return MemOpcodes[Form231];
+ }
+
+ /// Returns true iff the group of FMA opcodes holds intrinsic opcodes.
+ bool isIntrinsic() const { return (Attributes & X86FMA3Intrinsic) != 0; }
+
+ /// Returns true iff the group of FMA opcodes holds k-merge-masked opcodes.
+ bool isKMergeMasked() const {
+ return (Attributes & X86FMA3KMergeMasked) != 0;
+ }
+
+ /// Returns true iff the group of FMA opcodes holds k-zero-masked opcodes.
+ bool isKZeroMasked() const { return (Attributes & X86FMA3KZeroMasked) != 0; }
+
+ /// Returns true iff the group of FMA opcodes holds any of k-masked opcodes.
+ bool isKMasked() const {
+ return (Attributes & (X86FMA3KMergeMasked | X86FMA3KZeroMasked)) != 0;
+ }
+
+ /// Returns true iff the given \p Opcode is a register opcode from the
+ /// groups of FMA opcodes.
+ bool isRegOpcodeFromGroup(unsigned Opcode) const {
+ if (!RegOpcodes)
+ return false;
+ for (unsigned Form = 0; Form < 3; Form++)
+ if (Opcode == RegOpcodes[Form])
+ return true;
+ return false;
+ }
+
+ /// Returns true iff the given \p Opcode is a memory opcode from the
+ /// groups of FMA opcodes.
+ bool isMemOpcodeFromGroup(unsigned Opcode) const {
+ if (!MemOpcodes)
+ return false;
+ for (unsigned Form = 0; Form < 3; Form++)
+ if (Opcode == MemOpcodes[Form])
+ return true;
+ return false;
+ }
+};
+
+/// This class provides information about all existing FMA3 opcodes
+///
+class X86InstrFMA3Info {
+private:
+ /// A map that is used to find the group of FMA opcodes using any FMA opcode
+ /// from the group.
+ DenseMap<unsigned, const X86InstrFMA3Group *> OpcodeToGroup;
+
+ /// Creates groups of FMA opcodes and initializes Opcode-to-Group map.
+ /// This method can be called many times, but the actual initialization is
+ /// called only once.
+ static void initGroupsOnce();
+
+ /// Creates groups of FMA opcodes and initializes Opcode-to-Group map.
+ /// This method must be called ONLY from initGroupsOnce(). Otherwise, such
+ /// call is not thread safe.
+ void initGroupsOnceImpl();
+
+ /// Creates one group of FMA opcodes having the register opcodes
+ /// \p RegOpcodes and memory opcodes \p MemOpcodes. The parameter \p Attr
+ /// specifies the attributes describing the created group.
+ void initRMGroup(const uint16_t *RegOpcodes,
+ const uint16_t *MemOpcodes, unsigned Attr = 0);
+
+ /// Creates one group of FMA opcodes having only the register opcodes
+ /// \p RegOpcodes. The parameter \p Attr specifies the attributes describing
+ /// the created group.
+ void initRGroup(const uint16_t *RegOpcodes, unsigned Attr = 0);
+
+ /// Creates one group of FMA opcodes having only the memory opcodes
+ /// \p MemOpcodes. The parameter \p Attr specifies the attributes describing
+ /// the created group.
+ void initMGroup(const uint16_t *MemOpcodes, unsigned Attr = 0);
+
+public:
+ /// Returns the reference to an object of this class. It is assumed that
+ /// only one object may exist.
+ static X86InstrFMA3Info *getX86InstrFMA3Info();
+
+ /// Constructor. Just creates an object of the class.
+ X86InstrFMA3Info() {}
+
+ /// Destructor. Deallocates the memory used for FMA3 Groups.
+ ~X86InstrFMA3Info() {
+ std::set<const X86InstrFMA3Group *> DeletedGroups;
+ auto E = OpcodeToGroup.end();
+ for (auto I = OpcodeToGroup.begin(); I != E; I++) {
+ const X86InstrFMA3Group *G = I->second;
+ if (DeletedGroups.find(G) == DeletedGroups.end()) {
+ DeletedGroups.insert(G);
+ delete G;
+ }
+ }
+ }
+
+ /// Returns a reference to a group of FMA3 opcodes to where the given
+ /// \p Opcode is included. If the given \p Opcode is not recognized as FMA3
+ /// and not included into any FMA3 group, then nullptr is returned.
+ static const X86InstrFMA3Group *getFMA3Group(unsigned Opcode) {
+ // Ensure that the groups of opcodes are initialized.
+ initGroupsOnce();
+
+ // Find the group including the given opcode.
+ const X86InstrFMA3Info *FMA3Info = getX86InstrFMA3Info();
+ auto I = FMA3Info->OpcodeToGroup.find(Opcode);
+ if (I == FMA3Info->OpcodeToGroup.end())
+ return nullptr;
+
+ return I->second;
+ }
+
+ /// Returns true iff the given \p Opcode is recognized as FMA3 by this class.
+ static bool isFMA3(unsigned Opcode) {
+ return getFMA3Group(Opcode) != nullptr;
+ }
+
+ /// Iterator that is used to walk on FMA register opcodes having memory
+ /// form equivalents.
+ class rm_iterator {
+ private:
+ /// Iterator associated with the OpcodeToGroup map. It must always be
+ /// initialized with an entry from OpcodeToGroup for which I->first
+ /// points to a register FMA opcode and I->second points to a group of
+ /// FMA opcodes having memory form equivalent of I->first.
+ DenseMap<unsigned, const X86InstrFMA3Group *>::const_iterator I;
+
+ public:
+ /// Constructor. Creates rm_iterator. The parameter \p I must be an
+ /// iterator to OpcodeToGroup map entry having I->first pointing to
+ /// register form FMA opcode and I->second pointing to a group of FMA
+ /// opcodes holding memory form equivalent for I->fist.
+ rm_iterator(DenseMap<unsigned, const X86InstrFMA3Group *>::const_iterator I)
+ : I(I) {}
+
+ /// Returns the register form FMA opcode.
+ unsigned getRegOpcode() const { return I->first; };
+
+ /// Returns the memory form equivalent opcode for FMA register opcode
+ /// referenced by I->first.
+ unsigned getMemOpcode() const {
+ unsigned Opcode = I->first;
+ const X86InstrFMA3Group *Group = I->second;
+ return Group->getMemOpcode(Opcode);
+ }
+
+ /// Returns a reference to a group of FMA opcodes.
+ const X86InstrFMA3Group *getGroup() const { return I->second; }
+
+ bool operator==(const rm_iterator &OtherIt) const { return I == OtherIt.I; }
+ bool operator!=(const rm_iterator &OtherIt) const { return I != OtherIt.I; }
+
+ /// Increment. Advances the 'I' iterator to the next OpcodeToGroup entry
+ /// having I->first pointing to register form FMA and I->second pointing
+ /// to a group of FMA opcodes holding memory form equivalen for I->first.
+ rm_iterator &operator++() {
+ auto E = getX86InstrFMA3Info()->OpcodeToGroup.end();
+ for (++I; I != E; ++I) {
+ unsigned RegOpcode = I->first;
+ const X86InstrFMA3Group *Group = I->second;
+ if (Group->getMemOpcode(RegOpcode) != 0)
+ break;
+ }
+ return *this;
+ }
+ };
+
+ /// Returns rm_iterator pointing to the first entry of OpcodeToGroup map
+ /// with a register FMA opcode having memory form opcode equivalent.
+ static rm_iterator rm_begin() {
+ initGroupsOnce();
+ const X86InstrFMA3Info *FMA3Info = getX86InstrFMA3Info();
+ auto I = FMA3Info->OpcodeToGroup.begin();
+ auto E = FMA3Info->OpcodeToGroup.end();
+ while (I != E) {
+ unsigned Opcode = I->first;
+ const X86InstrFMA3Group *G = I->second;
+ if (G->getMemOpcode(Opcode) != 0)
+ break;
+ I++;
+ }
+ return rm_iterator(I);
+ }
+
+ /// Returns the last rm_iterator.
+ static rm_iterator rm_end() {
+ initGroupsOnce();
+ return rm_iterator(getX86InstrFMA3Info()->OpcodeToGroup.end());
+ }
+};
+
+#endif
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 9a83c09dfdb..9df179d566c 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -1855,281 +1855,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
}
static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
- // FMA foldable instructions
- { X86::VFMADD231SSr, X86::VFMADD231SSm, TB_ALIGN_NONE },
- { X86::VFMADD231SSr_Int, X86::VFMADD231SSm_Int, TB_ALIGN_NONE },
- { X86::VFMADD231SDr, X86::VFMADD231SDm, TB_ALIGN_NONE },
- { X86::VFMADD231SDr_Int, X86::VFMADD231SDm_Int, TB_ALIGN_NONE },
- { X86::VFMADD132SSr, X86::VFMADD132SSm, TB_ALIGN_NONE },
- { X86::VFMADD132SSr_Int, X86::VFMADD132SSm_Int, TB_ALIGN_NONE },
- { X86::VFMADD132SDr, X86::VFMADD132SDm, TB_ALIGN_NONE },
- { X86::VFMADD132SDr_Int, X86::VFMADD132SDm_Int, TB_ALIGN_NONE },
- { X86::VFMADD213SSr, X86::VFMADD213SSm, TB_ALIGN_NONE },
- { X86::VFMADD213SSr_Int, X86::VFMADD213SSm_Int, TB_ALIGN_NONE },
- { X86::VFMADD213SDr, X86::VFMADD213SDm, TB_ALIGN_NONE },
- { X86::VFMADD213SDr_Int, X86::VFMADD213SDm_Int, TB_ALIGN_NONE },
- { X86::VFMADD231SSZr, X86::VFMADD231SSZm, TB_ALIGN_NONE },
- { X86::VFMADD231SSZr_Int, X86::VFMADD231SSZm_Int, TB_ALIGN_NONE },
- { X86::VFMADD231SDZr, X86::VFMADD231SDZm, TB_ALIGN_NONE },
- { X86::VFMADD231SDZr_Int, X86::VFMADD231SDZm_Int, TB_ALIGN_NONE },
- { X86::VFMADD132SSZr, X86::VFMADD132SSZm, TB_ALIGN_NONE },
- { X86::VFMADD132SSZr_Int, X86::VFMADD132SSZm_Int, TB_ALIGN_NONE },
- { X86::VFMADD132SDZr, X86::VFMADD132SDZm, TB_ALIGN_NONE },
- { X86::VFMADD132SDZr_Int, X86::VFMADD132SDZm_Int, TB_ALIGN_NONE },
- { X86::VFMADD213SSZr, X86::VFMADD213SSZm, TB_ALIGN_NONE },
- { X86::VFMADD213SSZr_Int, X86::VFMADD213SSZm_Int, TB_ALIGN_NONE },
- { X86::VFMADD213SDZr, X86::VFMADD213SDZm, TB_ALIGN_NONE },
- { X86::VFMADD213SDZr_Int, X86::VFMADD213SDZm_Int, TB_ALIGN_NONE },
-
- { X86::VFMADD231PSr, X86::VFMADD231PSm, TB_ALIGN_NONE },
- { X86::VFMADD231PDr, X86::VFMADD231PDm, TB_ALIGN_NONE },
- { X86::VFMADD132PSr, X86::VFMADD132PSm, TB_ALIGN_NONE },
- { X86::VFMADD132PDr, X86::VFMADD132PDm, TB_ALIGN_NONE },
- { X86::VFMADD213PSr, X86::VFMADD213PSm, TB_ALIGN_NONE },
- { X86::VFMADD213PDr, X86::VFMADD213PDm, TB_ALIGN_NONE },
- { X86::VFMADD231PSYr, X86::VFMADD231PSYm, TB_ALIGN_NONE },
- { X86::VFMADD231PDYr, X86::VFMADD231PDYm, TB_ALIGN_NONE },
- { X86::VFMADD132PSYr, X86::VFMADD132PSYm, TB_ALIGN_NONE },
- { X86::VFMADD132PDYr, X86::VFMADD132PDYm, TB_ALIGN_NONE },
- { X86::VFMADD213PSYr, X86::VFMADD213PSYm, TB_ALIGN_NONE },
- { X86::VFMADD213PDYr, X86::VFMADD213PDYm, TB_ALIGN_NONE },
- { X86::VFMADD231PSZr, X86::VFMADD231PSZm, TB_ALIGN_NONE },
- { X86::VFMADD231PDZr, X86::VFMADD231PDZm, TB_ALIGN_NONE },
- { X86::VFMADD132PSZr, X86::VFMADD132PSZm, TB_ALIGN_NONE },
- { X86::VFMADD132PDZr, X86::VFMADD132PDZm, TB_ALIGN_NONE },
- { X86::VFMADD213PSZr, X86::VFMADD213PSZm, TB_ALIGN_NONE },
- { X86::VFMADD213PDZr, X86::VFMADD213PDZm, TB_ALIGN_NONE },
- { X86::VFMADD231PSZ128r, X86::VFMADD231PSZ128m, TB_ALIGN_NONE },
- { X86::VFMADD231PDZ128r, X86::VFMADD231PDZ128m, TB_ALIGN_NONE },
- { X86::VFMADD132PSZ128r, X86::VFMADD132PSZ128m, TB_ALIGN_NONE },
- { X86::VFMADD132PDZ128r, X86::VFMADD132PDZ128m, TB_ALIGN_NONE },
- { X86::VFMADD213PSZ128r, X86::VFMADD213PSZ128m, TB_ALIGN_NONE },
- { X86::VFMADD213PDZ128r, X86::VFMADD213PDZ128m, TB_ALIGN_NONE },
- { X86::VFMADD231PSZ256r, X86::VFMADD231PSZ256m, TB_ALIGN_NONE },
- { X86::VFMADD231PDZ256r, X86::VFMADD231PDZ256m, TB_ALIGN_NONE },
- { X86::VFMADD132PSZ256r, X86::VFMADD132PSZ256m, TB_ALIGN_NONE },
- { X86::VFMADD132PDZ256r, X86::VFMADD132PDZ256m, TB_ALIGN_NONE },
- { X86::VFMADD213PSZ256r, X86::VFMADD213PSZ256m, TB_ALIGN_NONE },
- { X86::VFMADD213PDZ256r, X86::VFMADD213PDZ256m, TB_ALIGN_NONE },
-
- { X86::VFNMADD231SSr, X86::VFNMADD231SSm, TB_ALIGN_NONE },
- { X86::VFNMADD231SSr_Int, X86::VFNMADD231SSm_Int, TB_ALIGN_NONE },
- { X86::VFNMADD231SDr, X86::VFNMADD231SDm, TB_ALIGN_NONE },
- { X86::VFNMADD231SDr_Int, X86::VFNMADD231SDm_Int, TB_ALIGN_NONE },
- { X86::VFNMADD132SSr, X86::VFNMADD132SSm, TB_ALIGN_NONE },
- { X86::VFNMADD132SSr_Int, X86::VFNMADD132SSm_Int, TB_ALIGN_NONE },
- { X86::VFNMADD132SDr, X86::VFNMADD132SDm, TB_ALIGN_NONE },
- { X86::VFNMADD132SDr_Int, X86::VFNMADD132SDm_Int, TB_ALIGN_NONE },
- { X86::VFNMADD213SSr, X86::VFNMADD213SSm, TB_ALIGN_NONE },
- { X86::VFNMADD213SSr_Int, X86::VFNMADD213SSm_Int, TB_ALIGN_NONE },
- { X86::VFNMADD213SDr, X86::VFNMADD213SDm, TB_ALIGN_NONE },
- { X86::VFNMADD213SDr_Int, X86::VFNMADD213SDm_Int, TB_ALIGN_NONE },
- { X86::VFNMADD231SSZr, X86::VFNMADD231SSZm, TB_ALIGN_NONE },
- { X86::VFNMADD231SSZr_Int, X86::VFNMADD231SSZm_Int, TB_ALIGN_NONE },
- { X86::VFNMADD231SDZr, X86::VFNMADD231SDZm, TB_ALIGN_NONE },
- { X86::VFNMADD231SDZr_Int, X86::VFNMADD231SDZm_Int, TB_ALIGN_NONE },
- { X86::VFNMADD132SSZr, X86::VFNMADD132SSZm, TB_ALIGN_NONE },
- { X86::VFNMADD132SSZr_Int, X86::VFNMADD132SSZm_Int, TB_ALIGN_NONE },
- { X86::VFNMADD132SDZr, X86::VFNMADD132SDZm, TB_ALIGN_NONE },
- { X86::VFNMADD132SDZr_Int, X86::VFNMADD132SDZm_Int, TB_ALIGN_NONE },
- { X86::VFNMADD213SSZr, X86::VFNMADD213SSZm, TB_ALIGN_NONE },
- { X86::VFNMADD213SSZr_Int, X86::VFNMADD213SSZm_Int, TB_ALIGN_NONE },
- { X86::VFNMADD213SDZr, X86::VFNMADD213SDZm, TB_ALIGN_NONE },
- { X86::VFNMADD213SDZr_Int, X86::VFNMADD213SDZm_Int, TB_ALIGN_NONE },
-
- { X86::VFNMADD231PSr, X86::VFNMADD231PSm, TB_ALIGN_NONE },
- { X86::VFNMADD231PDr, X86::VFNMADD231PDm, TB_ALIGN_NONE },
- { X86::VFNMADD132PSr, X86::VFNMADD132PSm, TB_ALIGN_NONE },
- { X86::VFNMADD132PDr, X86::VFNMADD132PDm, TB_ALIGN_NONE },
- { X86::VFNMADD213PSr, X86::VFNMADD213PSm, TB_ALIGN_NONE },
- { X86::VFNMADD213PDr, X86::VFNMADD213PDm, TB_ALIGN_NONE },
- { X86::VFNMADD231PSYr, X86::VFNMADD231PSYm, TB_ALIGN_NONE },
- { X86::VFNMADD231PDYr, X86::VFNMADD231PDYm, TB_ALIGN_NONE },
- { X86::VFNMADD132PSYr, X86::VFNMADD132PSYm, TB_ALIGN_NONE },
- { X86::VFNMADD132PDYr, X86::VFNMADD132PDYm, TB_ALIGN_NONE },
- { X86::VFNMADD213PSYr, X86::VFNMADD213PSYm, TB_ALIGN_NONE },
- { X86::VFNMADD213PDYr, X86::VFNMADD213PDYm, TB_ALIGN_NONE },
- { X86::VFNMADD231PSZr, X86::VFNMADD231PSZm, TB_ALIGN_NONE },
- { X86::VFNMADD231PDZr, X86::VFNMADD231PDZm, TB_ALIGN_NONE },
- { X86::VFNMADD132PSZr, X86::VFNMADD132PSZm, TB_ALIGN_NONE },
- { X86::VFNMADD132PDZr, X86::VFNMADD132PDZm, TB_ALIGN_NONE },
- { X86::VFNMADD213PSZr, X86::VFNMADD213PSZm, TB_ALIGN_NONE },
- { X86::VFNMADD213PDZr, X86::VFNMADD213PDZm, TB_ALIGN_NONE },
- { X86::VFNMADD231PSZ128r, X86::VFNMADD231PSZ128m, TB_ALIGN_NONE },
- { X86::VFNMADD231PDZ128r, X86::VFNMADD231PDZ128m, TB_ALIGN_NONE },
- { X86::VFNMADD132PSZ128r, X86::VFNMADD132PSZ128m, TB_ALIGN_NONE },
- { X86::VFNMADD132PDZ128r, X86::VFNMADD132PDZ128m, TB_ALIGN_NONE },
- { X86::VFNMADD213PSZ128r, X86::VFNMADD213PSZ128m, TB_ALIGN_NONE },
- { X86::VFNMADD213PDZ128r, X86::VFNMADD213PDZ128m, TB_ALIGN_NONE },
- { X86::VFNMADD231PSZ256r, X86::VFNMADD231PSZ256m, TB_ALIGN_NONE },
- { X86::VFNMADD231PDZ256r, X86::VFNMADD231PDZ256m, TB_ALIGN_NONE },
- { X86::VFNMADD132PSZ256r, X86::VFNMADD132PSZ256m, TB_ALIGN_NONE },
- { X86::VFNMADD132PDZ256r, X86::VFNMADD132PDZ256m, TB_ALIGN_NONE },
- { X86::VFNMADD213PSZ256r, X86::VFNMADD213PSZ256m, TB_ALIGN_NONE },
- { X86::VFNMADD213PDZ256r, X86::VFNMADD213PDZ256m, TB_ALIGN_NONE },
-
- { X86::VFMSUB231SSr, X86::VFMSUB231SSm, TB_ALIGN_NONE },
- { X86::VFMSUB231SSr_Int, X86::VFMSUB231SSm_Int, TB_ALIGN_NONE },
- { X86::VFMSUB231SDr, X86::VFMSUB231SDm, TB_ALIGN_NONE },
- { X86::VFMSUB231SDr_Int, X86::VFMSUB231SDm_Int, TB_ALIGN_NONE },
- { X86::VFMSUB132SSr, X86::VFMSUB132SSm, TB_ALIGN_NONE },
- { X86::VFMSUB132SSr_Int, X86::VFMSUB132SSm_Int, TB_ALIGN_NONE },
- { X86::VFMSUB132SDr, X86::VFMSUB132SDm, TB_ALIGN_NONE },
- { X86::VFMSUB132SDr_Int, X86::VFMSUB132SDm_Int, TB_ALIGN_NONE },
- { X86::VFMSUB213SSr, X86::VFMSUB213SSm, TB_ALIGN_NONE },
- { X86::VFMSUB213SSr_Int, X86::VFMSUB213SSm_Int, TB_ALIGN_NONE },
- { X86::VFMSUB213SDr, X86::VFMSUB213SDm, TB_ALIGN_NONE },
- { X86::VFMSUB213SDr_Int, X86::VFMSUB213SDm_Int, TB_ALIGN_NONE },
- { X86::VFMSUB231SSZr, X86::VFMSUB231SSZm, TB_ALIGN_NONE },
- { X86::VFMSUB231SSZr_Int, X86::VFMSUB231SSZm_Int, TB_ALIGN_NONE },
- { X86::VFMSUB231SDZr, X86::VFMSUB231SDZm, TB_ALIGN_NONE },
- { X86::VFMSUB231SDZr_Int, X86::VFMSUB231SDZm_Int, TB_ALIGN_NONE },
- { X86::VFMSUB132SSZr, X86::VFMSUB132SSZm, TB_ALIGN_NONE },
- { X86::VFMSUB132SSZr_Int, X86::VFMSUB132SSZm_Int, TB_ALIGN_NONE },
- { X86::VFMSUB132SDZr, X86::VFMSUB132SDZm, TB_ALIGN_NONE },
- { X86::VFMSUB132SDZr_Int, X86::VFMSUB132SDZm_Int, TB_ALIGN_NONE },
- { X86::VFMSUB213SSZr, X86::VFMSUB213SSZm, TB_ALIGN_NONE },
- { X86::VFMSUB213SSZr_Int, X86::VFMSUB213SSZm_Int, TB_ALIGN_NONE },
- { X86::VFMSUB213SDZr, X86::VFMSUB213SDZm, TB_ALIGN_NONE },
- { X86::VFMSUB213SDZr_Int, X86::VFMSUB213SDZm_Int, TB_ALIGN_NONE },
-
- { X86::VFMSUB231PSr, X86::VFMSUB231PSm, TB_ALIGN_NONE },
- { X86::VFMSUB231PDr, X86::VFMSUB231PDm, TB_ALIGN_NONE },
- { X86::VFMSUB132PSr, X86::VFMSUB132PSm, TB_ALIGN_NONE },
- { X86::VFMSUB132PDr, X86::VFMSUB132PDm, TB_ALIGN_NONE },
- { X86::VFMSUB213PSr, X86::VFMSUB213PSm, TB_ALIGN_NONE },
- { X86::VFMSUB213PDr, X86::VFMSUB213PDm, TB_ALIGN_NONE },
- { X86::VFMSUB231PSYr, X86::VFMSUB231PSYm, TB_ALIGN_NONE },
- { X86::VFMSUB231PDYr, X86::VFMSUB231PDYm, TB_ALIGN_NONE },
- { X86::VFMSUB132PSYr, X86::VFMSUB132PSYm, TB_ALIGN_NONE },
- { X86::VFMSUB132PDYr, X86::VFMSUB132PDYm, TB_ALIGN_NONE },
- { X86::VFMSUB213PSYr, X86::VFMSUB213PSYm, TB_ALIGN_NONE },
- { X86::VFMSUB213PDYr, X86::VFMSUB213PDYm, TB_ALIGN_NONE },
- { X86::VFMSUB231PSZr, X86::VFMSUB231PSZm, TB_ALIGN_NONE },
- { X86::VFMSUB231PDZr, X86::VFMSUB231PDZm, TB_ALIGN_NONE },
- { X86::VFMSUB132PSZr, X86::VFMSUB132PSZm, TB_ALIGN_NONE },
- { X86::VFMSUB132PDZr, X86::VFMSUB132PDZm, TB_ALIGN_NONE },
- { X86::VFMSUB213PSZr, X86::VFMSUB213PSZm, TB_ALIGN_NONE },
- { X86::VFMSUB213PDZr, X86::VFMSUB213PDZm, TB_ALIGN_NONE },
- { X86::VFMSUB231PSZ128r, X86::VFMSUB231PSZ128m, TB_ALIGN_NONE },
- { X86::VFMSUB231PDZ128r, X86::VFMSUB231PDZ128m, TB_ALIGN_NONE },
- { X86::VFMSUB132PSZ128r, X86::VFMSUB132PSZ128m, TB_ALIGN_NONE },
- { X86::VFMSUB132PDZ128r, X86::VFMSUB132PDZ128m, TB_ALIGN_NONE },
- { X86::VFMSUB213PSZ128r, X86::VFMSUB213PSZ128m, TB_ALIGN_NONE },
- { X86::VFMSUB213PDZ128r, X86::VFMSUB213PDZ128m, TB_ALIGN_NONE },
- { X86::VFMSUB231PSZ256r, X86::VFMSUB231PSZ256m, TB_ALIGN_NONE },
- { X86::VFMSUB231PDZ256r, X86::VFMSUB231PDZ256m, TB_ALIGN_NONE },
- { X86::VFMSUB132PSZ256r, X86::VFMSUB132PSZ256m, TB_ALIGN_NONE },
- { X86::VFMSUB132PDZ256r, X86::VFMSUB132PDZ256m, TB_ALIGN_NONE },
- { X86::VFMSUB213PSZ256r, X86::VFMSUB213PSZ256m, TB_ALIGN_NONE },
- { X86::VFMSUB213PDZ256r, X86::VFMSUB213PDZ256m, TB_ALIGN_NONE },
-
- { X86::VFNMSUB231SSr, X86::VFNMSUB231SSm, TB_ALIGN_NONE },
- { X86::VFNMSUB231SSr_Int, X86::VFNMSUB231SSm_Int, TB_ALIGN_NONE },
- { X86::VFNMSUB231SDr, X86::VFNMSUB231SDm, TB_ALIGN_NONE },
- { X86::VFNMSUB231SDr_Int, X86::VFNMSUB231SDm_Int, TB_ALIGN_NONE },
- { X86::VFNMSUB132SSr, X86::VFNMSUB132SSm, TB_ALIGN_NONE },
- { X86::VFNMSUB132SSr_Int, X86::VFNMSUB132SSm_Int, TB_ALIGN_NONE },
- { X86::VFNMSUB132SDr, X86::VFNMSUB132SDm, TB_ALIGN_NONE },
- { X86::VFNMSUB132SDr_Int, X86::VFNMSUB132SDm_Int, TB_ALIGN_NONE },
- { X86::VFNMSUB213SSr, X86::VFNMSUB213SSm, TB_ALIGN_NONE },
- { X86::VFNMSUB213SSr_Int, X86::VFNMSUB213SSm_Int, TB_ALIGN_NONE },
- { X86::VFNMSUB213SDr, X86::VFNMSUB213SDm, TB_ALIGN_NONE },
- { X86::VFNMSUB213SDr_Int, X86::VFNMSUB213SDm_Int, TB_ALIGN_NONE },
-
- { X86::VFNMSUB231PSr, X86::VFNMSUB231PSm, TB_ALIGN_NONE },
- { X86::VFNMSUB231PDr, X86::VFNMSUB231PDm, TB_ALIGN_NONE },
- { X86::VFNMSUB132PSr, X86::VFNMSUB132PSm, TB_ALIGN_NONE },
- { X86::VFNMSUB132PDr, X86::VFNMSUB132PDm, TB_ALIGN_NONE },
- { X86::VFNMSUB213PSr, X86::VFNMSUB213PSm, TB_ALIGN_NONE },
- { X86::VFNMSUB213PDr, X86::VFNMSUB213PDm, TB_ALIGN_NONE },
- { X86::VFNMSUB231PSYr, X86::VFNMSUB231PSYm, TB_ALIGN_NONE },
- { X86::VFNMSUB231PDYr, X86::VFNMSUB231PDYm, TB_ALIGN_NONE },
- { X86::VFNMSUB132PSYr, X86::VFNMSUB132PSYm, TB_ALIGN_NONE },
- { X86::VFNMSUB132PDYr, X86::VFNMSUB132PDYm, TB_ALIGN_NONE },
- { X86::VFNMSUB213PSYr, X86::VFNMSUB213PSYm, TB_ALIGN_NONE },
- { X86::VFNMSUB213PDYr, X86::VFNMSUB213PDYm, TB_ALIGN_NONE },
- { X86::VFNMSUB231PSZr, X86::VFNMSUB231PSZm, TB_ALIGN_NONE },
- { X86::VFNMSUB231PDZr, X86::VFNMSUB231PDZm, TB_ALIGN_NONE },
- { X86::VFNMSUB132PSZr, X86::VFNMSUB132PSZm, TB_ALIGN_NONE },
- { X86::VFNMSUB132PDZr, X86::VFNMSUB132PDZm, TB_ALIGN_NONE },
- { X86::VFNMSUB213PSZr, X86::VFNMSUB213PSZm, TB_ALIGN_NONE },
- { X86::VFNMSUB213PDZr, X86::VFNMSUB213PDZm, TB_ALIGN_NONE },
- { X86::VFNMSUB231PSZ128r, X86::VFNMSUB231PSZ128m, TB_ALIGN_NONE },
- { X86::VFNMSUB231PDZ128r, X86::VFNMSUB231PDZ128m, TB_ALIGN_NONE },
- { X86::VFNMSUB132PSZ128r, X86::VFNMSUB132PSZ128m, TB_ALIGN_NONE },
- { X86::VFNMSUB132PDZ128r, X86::VFNMSUB132PDZ128m, TB_ALIGN_NONE },
- { X86::VFNMSUB213PSZ128r, X86::VFNMSUB213PSZ128m, TB_ALIGN_NONE },
- { X86::VFNMSUB213PDZ128r, X86::VFNMSUB213PDZ128m, TB_ALIGN_NONE },
- { X86::VFNMSUB231PSZ256r, X86::VFNMSUB231PSZ256m, TB_ALIGN_NONE },
- { X86::VFNMSUB231PDZ256r, X86::VFNMSUB231PDZ256m, TB_ALIGN_NONE },
- { X86::VFNMSUB132PSZ256r, X86::VFNMSUB132PSZ256m, TB_ALIGN_NONE },
- { X86::VFNMSUB132PDZ256r, X86::VFNMSUB132PDZ256m, TB_ALIGN_NONE },
- { X86::VFNMSUB213PSZ256r, X86::VFNMSUB213PSZ256m, TB_ALIGN_NONE },
- { X86::VFNMSUB213PDZ256r, X86::VFNMSUB213PDZ256m, TB_ALIGN_NONE },
-
- { X86::VFMADDSUB231PSr, X86::VFMADDSUB231PSm, TB_ALIGN_NONE },
- { X86::VFMADDSUB231PDr, X86::VFMADDSUB231PDm, TB_ALIGN_NONE },
- { X86::VFMADDSUB132PSr, X86::VFMADDSUB132PSm, TB_ALIGN_NONE },
- { X86::VFMADDSUB132PDr, X86::VFMADDSUB132PDm, TB_ALIGN_NONE },
- { X86::VFMADDSUB213PSr, X86::VFMADDSUB213PSm, TB_ALIGN_NONE },
- { X86::VFMADDSUB213PDr, X86::VFMADDSUB213PDm, TB_ALIGN_NONE },
- { X86::VFMADDSUB231PSYr, X86::VFMADDSUB231PSYm, TB_ALIGN_NONE },
- { X86::VFMADDSUB231PDYr, X86::VFMADDSUB231PDYm, TB_ALIGN_NONE },
- { X86::VFMADDSUB132PSYr, X86::VFMADDSUB132PSYm, TB_ALIGN_NONE },
- { X86::VFMADDSUB132PDYr, X86::VFMADDSUB132PDYm, TB_ALIGN_NONE },
- { X86::VFMADDSUB213PSYr, X86::VFMADDSUB213PSYm, TB_ALIGN_NONE },
- { X86::VFMADDSUB213PDYr, X86::VFMADDSUB213PDYm, TB_ALIGN_NONE },
- { X86::VFMADDSUB231PSZr, X86::VFMADDSUB231PSZm, TB_ALIGN_NONE },
- { X86::VFMADDSUB231PDZr, X86::VFMADDSUB231PDZm, TB_ALIGN_NONE },
- { X86::VFMADDSUB132PSZr, X86::VFMADDSUB132PSZm, TB_ALIGN_NONE },
- { X86::VFMADDSUB132PDZr, X86::VFMADDSUB132PDZm, TB_ALIGN_NONE },
- { X86::VFMADDSUB213PSZr, X86::VFMADDSUB213PSZm, TB_ALIGN_NONE },
- { X86::VFMADDSUB213PDZr, X86::VFMADDSUB213PDZm, TB_ALIGN_NONE },
- { X86::VFMADDSUB231PSZ128r, X86::VFMADDSUB231PSZ128m, TB_ALIGN_NONE },
- { X86::VFMADDSUB231PDZ128r, X86::VFMADDSUB231PDZ128m, TB_ALIGN_NONE },
- { X86::VFMADDSUB132PSZ128r, X86::VFMADDSUB132PSZ128m, TB_ALIGN_NONE },
- { X86::VFMADDSUB132PDZ128r, X86::VFMADDSUB132PDZ128m, TB_ALIGN_NONE },
- { X86::VFMADDSUB213PSZ128r, X86::VFMADDSUB213PSZ128m, TB_ALIGN_NONE },
- { X86::VFMADDSUB213PDZ128r, X86::VFMADDSUB213PDZ128m, TB_ALIGN_NONE },
- { X86::VFMADDSUB231PSZ256r, X86::VFMADDSUB231PSZ256m, TB_ALIGN_NONE },
- { X86::VFMADDSUB231PDZ256r, X86::VFMADDSUB231PDZ256m, TB_ALIGN_NONE },
- { X86::VFMADDSUB132PSZ256r, X86::VFMADDSUB132PSZ256m, TB_ALIGN_NONE },
- { X86::VFMADDSUB132PDZ256r, X86::VFMADDSUB132PDZ256m, TB_ALIGN_NONE },
- { X86::VFMADDSUB213PSZ256r, X86::VFMADDSUB213PSZ256m, TB_ALIGN_NONE },
- { X86::VFMADDSUB213PDZ256r, X86::VFMADDSUB213PDZ256m, TB_ALIGN_NONE },
-
- { X86::VFMSUBADD231PSr, X86::VFMSUBADD231PSm, TB_ALIGN_NONE },
- { X86::VFMSUBADD231PDr, X86::VFMSUBADD231PDm, TB_ALIGN_NONE },
- { X86::VFMSUBADD132PSr, X86::VFMSUBADD132PSm, TB_ALIGN_NONE },
- { X86::VFMSUBADD132PDr, X86::VFMSUBADD132PDm, TB_ALIGN_NONE },
- { X86::VFMSUBADD213PSr, X86::VFMSUBADD213PSm, TB_ALIGN_NONE },
- { X86::VFMSUBADD213PDr, X86::VFMSUBADD213PDm, TB_ALIGN_NONE },
- { X86::VFMSUBADD231PSYr, X86::VFMSUBADD231PSYm, TB_ALIGN_NONE },
- { X86::VFMSUBADD231PDYr, X86::VFMSUBADD231PDYm, TB_ALIGN_NONE },
- { X86::VFMSUBADD132PSYr, X86::VFMSUBADD132PSYm, TB_ALIGN_NONE },
- { X86::VFMSUBADD132PDYr, X86::VFMSUBADD132PDYm, TB_ALIGN_NONE },
- { X86::VFMSUBADD213PSYr, X86::VFMSUBADD213PSYm, TB_ALIGN_NONE },
- { X86::VFMSUBADD213PDYr, X86::VFMSUBADD213PDYm, TB_ALIGN_NONE },
- { X86::VFMSUBADD231PSZr, X86::VFMSUBADD231PSZm, TB_ALIGN_NONE },
- { X86::VFMSUBADD231PDZr, X86::VFMSUBADD231PDZm, TB_ALIGN_NONE },
- { X86::VFMSUBADD132PSZr, X86::VFMSUBADD132PSZm, TB_ALIGN_NONE },
- { X86::VFMSUBADD132PDZr, X86::VFMSUBADD132PDZm, TB_ALIGN_NONE },
- { X86::VFMSUBADD213PSZr, X86::VFMSUBADD213PSZm, TB_ALIGN_NONE },
- { X86::VFMSUBADD213PDZr, X86::VFMSUBADD213PDZm, TB_ALIGN_NONE },
- { X86::VFMSUBADD231PSZ128r, X86::VFMSUBADD231PSZ128m, TB_ALIGN_NONE },
- { X86::VFMSUBADD231PDZ128r, X86::VFMSUBADD231PDZ128m, TB_ALIGN_NONE },
- { X86::VFMSUBADD132PSZ128r, X86::VFMSUBADD132PSZ128m, TB_ALIGN_NONE },
- { X86::VFMSUBADD132PDZ128r, X86::VFMSUBADD132PDZ128m, TB_ALIGN_NONE },
- { X86::VFMSUBADD213PSZ128r, X86::VFMSUBADD213PSZ128m, TB_ALIGN_NONE },
- { X86::VFMSUBADD213PDZ128r, X86::VFMSUBADD213PDZ128m, TB_ALIGN_NONE },
- { X86::VFMSUBADD231PSZ256r, X86::VFMSUBADD231PSZ256m, TB_ALIGN_NONE },
- { X86::VFMSUBADD231PDZ256r, X86::VFMSUBADD231PDZ256m, TB_ALIGN_NONE },
- { X86::VFMSUBADD132PSZ256r, X86::VFMSUBADD132PSZ256m, TB_ALIGN_NONE },
- { X86::VFMSUBADD132PDZ256r, X86::VFMSUBADD132PDZ256m, TB_ALIGN_NONE },
- { X86::VFMSUBADD213PSZ256r, X86::VFMSUBADD213PSZ256m, TB_ALIGN_NONE },
- { X86::VFMSUBADD213PDZ256r, X86::VFMSUBADD213PDZ256m, TB_ALIGN_NONE },
-
// FMA4 foldable patterns
{ X86::VFMADDSS4rr, X86::VFMADDSS4rm, TB_ALIGN_NONE },
{ X86::VFMADDSD4rr, X86::VFMADDSD4rm, TB_ALIGN_NONE },
@@ -2234,6 +1959,13 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
// Index 3, folded load
Entry.Flags | TB_INDEX_3 | TB_FOLDED_LOAD);
}
+ auto I = X86InstrFMA3Info::rm_begin();
+ auto E = X86InstrFMA3Info::rm_end();
+ for (; I != E; ++I)
+ if (!I.getGroup()->isKMasked())
+ AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable,
+ I.getRegOpcode(), I.getMemOpcode(),
+ TB_ALIGN_NONE | TB_INDEX_3 | TB_FOLDED_LOAD);
static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
// AVX-512 foldable instructions
@@ -2283,6 +2015,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
// Index 4, folded load
Entry.Flags | TB_INDEX_4 | TB_FOLDED_LOAD);
}
+ for (I = X86InstrFMA3Info::rm_begin(); I != E; ++I)
+ if (I.getGroup()->isKMasked())
+ AddTableEntry(RegOp2MemOpTable4, MemOp2RegOpTable,
+ I.getRegOpcode(), I.getMemOpcode(),
+ TB_ALIGN_NONE | TB_INDEX_4 | TB_FOLDED_LOAD);
}
void
@@ -3345,241 +3082,11 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
return NewMI;
}
-/// Returns true if the given instruction opcode is FMA3.
-/// Otherwise, returns false.
-/// The second parameter is optional and is used as the second return from
-/// the function. It is set to true if the given instruction has FMA3 opcode
-/// that is used for lowering of scalar FMA intrinsics, and it is set to false
-/// otherwise.
-static bool isFMA3(unsigned Opcode, bool &IsIntrinsic) {
- IsIntrinsic = false;
-
-#define FMA3_CASE(Name, Modifier) \
-case X86::Name##r##Modifier: case X86::Name##m##Modifier:
-
-#define FMA3_SCALAR_PAIR(Name, Size, Modifier) \
- FMA3_CASE(Name##SD##Size, Modifier) \
- FMA3_CASE(Name##SS##Size, Modifier)
-
-#define FMA3_PACKED_PAIR(Name, Size) \
- FMA3_CASE(Name##PD##Size, ) \
- FMA3_CASE(Name##PS##Size, )
-
-#define FMA3_PACKED_SET(Form, Size) \
- FMA3_PACKED_PAIR(VFMADD##Form, Size) \
- FMA3_PACKED_PAIR(VFMSUB##Form, Size) \
- FMA3_PACKED_PAIR(VFNMADD##Form, Size) \
- FMA3_PACKED_PAIR(VFNMSUB##Form, Size) \
- FMA3_PACKED_PAIR(VFMADDSUB##Form, Size) \
- FMA3_PACKED_PAIR(VFMSUBADD##Form, Size)
-
-#define FMA3_CASES(Form) \
- FMA3_SCALAR_PAIR(VFMADD##Form, ,) \
- FMA3_SCALAR_PAIR(VFMSUB##Form, ,) \
- FMA3_SCALAR_PAIR(VFNMADD##Form, ,) \
- FMA3_SCALAR_PAIR(VFNMSUB##Form, ,) \
- FMA3_PACKED_SET(Form, ) \
- FMA3_PACKED_SET(Form, Y) \
-
-#define FMA3_CASES_AVX512(Form) \
- FMA3_SCALAR_PAIR(VFMADD##Form, Z, ) \
- FMA3_SCALAR_PAIR(VFMSUB##Form, Z, ) \
- FMA3_SCALAR_PAIR(VFNMADD##Form, Z, ) \
- FMA3_SCALAR_PAIR(VFNMSUB##Form, Z, ) \
- FMA3_PACKED_SET(Form, Z128) \
- FMA3_PACKED_SET(Form, Z256) \
- FMA3_PACKED_SET(Form, Z)
-
-#define FMA3_CASES_SCALAR_INT(Form) \
- FMA3_SCALAR_PAIR(VFMADD##Form, , _Int) \
- FMA3_SCALAR_PAIR(VFMSUB##Form, , _Int) \
- FMA3_SCALAR_PAIR(VFNMADD##Form, , _Int) \
- FMA3_SCALAR_PAIR(VFNMSUB##Form, , _Int)
-
-#define FMA3_CASES_SCALAR_INT_AVX512(Form) \
- FMA3_SCALAR_PAIR(VFMADD##Form, Z, _Int) \
- FMA3_SCALAR_PAIR(VFMSUB##Form, Z, _Int) \
- FMA3_SCALAR_PAIR(VFNMADD##Form, Z, _Int) \
- FMA3_SCALAR_PAIR(VFNMSUB##Form, Z, _Int)
-
- switch (Opcode) {
- FMA3_CASES(132)
- FMA3_CASES(213)
- FMA3_CASES(231)
-
- // AVX-512 instructions
- FMA3_CASES_AVX512(132)
- FMA3_CASES_AVX512(213)
- FMA3_CASES_AVX512(231)
- return true;
-
- FMA3_CASES_SCALAR_INT(132)
- FMA3_CASES_SCALAR_INT(213)
- FMA3_CASES_SCALAR_INT(231)
-
- // AVX-512 instructions
- FMA3_CASES_SCALAR_INT_AVX512(132)
- FMA3_CASES_SCALAR_INT_AVX512(213)
- FMA3_CASES_SCALAR_INT_AVX512(231)
- IsIntrinsic = true;
- return true;
- default:
- return false;
- }
- llvm_unreachable("Opcode not handled by the switch");
-
-#undef FMA3_CASE
-#undef FMA3_SCALAR_PAIR
-#undef FMA3_PACKED_PAIR
-#undef FMA3_PACKED_SET
-#undef FMA3_CASES
-#undef FMA3_CASES_AVX512
-#undef FMA3_CASES_SCALAR_INT
-#undef FMA3_CASES_SCALAR_INT_AVX512
-}
-
-/// Returns an adjusted FMA opcode that must be used in FMA instruction that
-/// performs the same computations as the given MI but which has the operands
-/// \p SrcOpIdx1 and \p SrcOpIdx2 commuted.
-/// It may return 0 if it is unsafe to commute the operands.
-///
-/// The returned FMA opcode may differ from the opcode in the given \p MI.
-/// For example, commuting the operands #1 and #3 in the following FMA
-/// FMA213 #1, #2, #3
-/// results into instruction with adjusted opcode:
-/// FMA231 #3, #2, #1
-static unsigned getFMA3OpcodeToCommuteOperands(unsigned Opc,
- bool IsIntrinOpcode,
- unsigned SrcOpIdx1,
- unsigned SrcOpIdx2) {
-#define FMA3_ENTRY(Name, Suffix) \
- { X86::Name##132##Suffix, X86::Name##213##Suffix, X86::Name##231##Suffix },
-
-#define FMA3_SCALAR_PAIR(Name, Suffix) \
- FMA3_ENTRY(Name, SS##Suffix) \
- FMA3_ENTRY(Name, SD##Suffix)
-
-#define FMA3_PACKED_PAIR(Name, Suffix) \
- FMA3_ENTRY(Name, PS##Suffix) \
- FMA3_ENTRY(Name, PD##Suffix)
-
-#define FMA3_PACKED_SIZES(Name, Suffix) \
- FMA3_PACKED_PAIR(Name, Suffix) \
- FMA3_PACKED_PAIR(Name, Y##Suffix)
-
-#define FMA3_TABLE_ALL(Name) \
- FMA3_SCALAR_PAIR(Name, r) \
- FMA3_PACKED_SIZES(Name, r) \
- FMA3_SCALAR_PAIR(Name, m) \
- FMA3_PACKED_SIZES(Name, m)
-
-#define FMA3_TABLE_PACKED(Name) \
- FMA3_PACKED_SIZES(Name, r) \
- FMA3_PACKED_SIZES(Name, m)
-
-#define FMA3_TABLE_SCALAR_INT(Name) \
- FMA3_SCALAR_PAIR(Name, r_Int) \
- FMA3_SCALAR_PAIR(Name, m_Int)
-
-#define FMA3_PACKED_SIZES_AVX512(Name, Suffix) \
- FMA3_PACKED_PAIR(Name, Z128##Suffix) \
- FMA3_PACKED_PAIR(Name, Z256##Suffix) \
- FMA3_PACKED_PAIR(Name, Z##Suffix)
-
-#define FMA3_TABLE_ALL_AVX512(Name) \
- FMA3_SCALAR_PAIR(Name, Zr) \
- FMA3_PACKED_SIZES_AVX512(Name, r) \
- FMA3_SCALAR_PAIR(Name, Zm) \
- FMA3_PACKED_SIZES_AVX512(Name, m)
-
-#define FMA3_TABLE_PACKED_AVX512(Name) \
- FMA3_PACKED_SIZES_AVX512(Name, r) \
- FMA3_PACKED_SIZES_AVX512(Name, m)
-
-#define FMA3_TABLE_SCALAR_INT_AVX512(Name) \
- FMA3_SCALAR_PAIR(Name, Zr_Int) \
- FMA3_SCALAR_PAIR(Name, Zm_Int)
-
- // Define the array that holds FMA opcodes in groups
- // of 3 opcodes(132, 213, 231) in each group.
- static const uint16_t RegularOpcodeGroups[][3] = {
- FMA3_TABLE_ALL(VFMADD)
- FMA3_TABLE_ALL(VFMSUB)
- FMA3_TABLE_ALL(VFNMADD)
- FMA3_TABLE_ALL(VFNMSUB)
- FMA3_TABLE_PACKED(VFMADDSUB)
- FMA3_TABLE_PACKED(VFMSUBADD)
-
- // AVX-512 instructions
- FMA3_TABLE_ALL_AVX512(VFMADD)
- FMA3_TABLE_ALL_AVX512(VFMSUB)
- FMA3_TABLE_ALL_AVX512(VFNMADD)
- FMA3_TABLE_ALL_AVX512(VFNMSUB)
- FMA3_TABLE_PACKED_AVX512(VFMADDSUB)
- FMA3_TABLE_PACKED_AVX512(VFMSUBADD)
- };
-
- // Define the array that holds FMA*_Int opcodes in groups
- // of 3 opcodes(132, 213, 231) in each group.
- static const uint16_t IntrinOpcodeGroups[][3] = {
- FMA3_TABLE_SCALAR_INT(VFMADD)
- FMA3_TABLE_SCALAR_INT(VFMSUB)
- FMA3_TABLE_SCALAR_INT(VFNMADD)
- FMA3_TABLE_SCALAR_INT(VFNMSUB)
-
- // AVX-512 instructions
- FMA3_TABLE_SCALAR_INT_AVX512(VFMADD)
- FMA3_TABLE_SCALAR_INT_AVX512(VFMSUB)
- FMA3_TABLE_SCALAR_INT_AVX512(VFNMADD)
- FMA3_TABLE_SCALAR_INT_AVX512(VFNMSUB)
- };
-
-#undef FMA3_ENTRY
-#undef FMA3_SCALAR_PAIR
-#undef FMA3_PACKED_PAIR
-#undef FMA3_PACKED_SIZES
-#undef FMA3_TABLE_ALL
-#undef FMA3_TABLE_PACKED
-#undef FMA3_TABLE_SCALAR_INT
-#undef FMA3_SCALAR_PAIR_AVX512
-#undef FMA3_PACKED_SIZES_AVX512
-#undef FMA3_TABLE_ALL_AVX512
-#undef FMA3_TABLE_PACKED_AVX512
-#undef FMA3_TABLE_SCALAR_INT_AVX512
-
- const unsigned Form132Index = 0;
- const unsigned Form213Index = 1;
- const unsigned Form231Index = 2;
- const unsigned FormsNum = 3;
-
- size_t GroupsNum;
- const uint16_t (*OpcodeGroups)[3];
- if (IsIntrinOpcode) {
- GroupsNum = array_lengthof(IntrinOpcodeGroups);
- OpcodeGroups = IntrinOpcodeGroups;
- } else {
- GroupsNum = array_lengthof(RegularOpcodeGroups);
- OpcodeGroups = RegularOpcodeGroups;
- }
-
- const uint16_t *FoundOpcodesGroup = nullptr;
- size_t FormIndex;
-
- // Look for the input opcode in the corresponding opcodes table.
- for (size_t GroupIndex = 0; GroupIndex < GroupsNum && !FoundOpcodesGroup;
- ++GroupIndex) {
- for (FormIndex = 0; FormIndex < FormsNum; ++FormIndex) {
- if (OpcodeGroups[GroupIndex][FormIndex] == Opc) {
- FoundOpcodesGroup = OpcodeGroups[GroupIndex];
- break;
- }
- }
- }
+unsigned X86InstrInfo::getFMA3OpcodeToCommuteOperands(
+ const MachineInstr &MI, unsigned SrcOpIdx1, unsigned SrcOpIdx2,
+ const X86InstrFMA3Group &FMA3Group) const {
- // The input opcode does not match with any of the opcodes from the tables.
- // The unsupported FMA opcode must be added to one of the two opcode groups
- // defined above.
- assert(FoundOpcodesGroup != nullptr && "Unexpected FMA3 opcode");
+ unsigned Opc = MI.getOpcode();
// Put the lowest index to SrcOpIdx1 to simplify the checks below.
if (SrcOpIdx1 > SrcOpIdx2)
@@ -3591,15 +3098,40 @@ static unsigned getFMA3OpcodeToCommuteOperands(unsigned Opc,
// not implemented yet. So, just return 0 in that case.
// When such analysis are available this place will be the right place for
// calling it.
- if (IsIntrinOpcode && SrcOpIdx1 == 1)
+ if (FMA3Group.isIntrinsic() && SrcOpIdx1 == 1)
return 0;
+ unsigned FMAOp1 = 1, FMAOp2 = 2, FMAOp3 = 3;
+ if (FMA3Group.isKMasked()) {
+ // The k-mask operand cannot be commuted.
+ if (SrcOpIdx1 == 2)
+ return 0;
+
+ // For k-zero-masked operations it is Ok to commute the first vector
+ // operand.
+ // For regular k-masked operations a conservative choice is done as the
+ // elements of the first vector operand, for which the corresponding bit
+ // in the k-mask operand is set to 0, are copied to the result of FMA.
+ // TODO/FIXME: The commute still may be legal if it is known that the
+ // k-mask operand is set to either all ones or all zeroes.
+ // It is also Ok to commute the 1st operand if all users of MI use only
+ // the elements enabled by the k-mask operand. For example,
+ // v4 = VFMADD213PSZrk v1, k, v2, v3; // v1[i] = k[i] ? v2[i]*v1[i]+v3[i]
+ // : v1[i];
+ // VMOVAPSZmrk <mem_addr>, k, v4; // this is the ONLY user of v4 ->
+ // // Ok, to commute v1 in FMADD213PSZrk.
+ if (FMA3Group.isKMergeMasked() && SrcOpIdx1 == FMAOp1)
+ return 0;
+ FMAOp2++;
+ FMAOp3++;
+ }
+
unsigned Case;
- if (SrcOpIdx1 == 1 && SrcOpIdx2 == 2)
+ if (SrcOpIdx1 == FMAOp1 && SrcOpIdx2 == FMAOp2)
Case = 0;
- else if (SrcOpIdx1 == 1 && SrcOpIdx2 == 3)
+ else if (SrcOpIdx1 == FMAOp1 && SrcOpIdx2 == FMAOp3)
Case = 1;
- else if (SrcOpIdx1 == 2 && SrcOpIdx2 == 3)
+ else if (SrcOpIdx1 == FMAOp2 && SrcOpIdx2 == FMAOp3)
Case = 2;
else
return 0;
@@ -3607,6 +3139,9 @@ static unsigned getFMA3OpcodeToCommuteOperands(unsigned Opc,
// Define the FMA forms mapping array that helps to map input FMA form
// to output FMA form to preserve the operation semantics after
// commuting the operands.
+ const unsigned Form132Index = 0;
+ const unsigned Form213Index = 1;
+ const unsigned Form231Index = 2;
static const unsigned FormMapping[][3] = {
// 0: SrcOpIdx1 == 1 && SrcOpIdx2 == 2;
// FMA132 A, C, b; ==> FMA231 C, A, b;
@@ -3625,9 +3160,24 @@ static unsigned getFMA3OpcodeToCommuteOperands(unsigned Opc,
{ Form213Index, Form132Index, Form231Index }
};
+ unsigned FMAForms[3];
+ if (FMA3Group.isRegOpcodeFromGroup(Opc)) {
+ FMAForms[0] = FMA3Group.getReg132Opcode();
+ FMAForms[1] = FMA3Group.getReg213Opcode();
+ FMAForms[2] = FMA3Group.getReg231Opcode();
+ } else {
+ FMAForms[0] = FMA3Group.getMem132Opcode();
+ FMAForms[1] = FMA3Group.getMem213Opcode();
+ FMAForms[2] = FMA3Group.getMem231Opcode();
+ }
+ unsigned FormIndex;
+ for (FormIndex = 0; FormIndex < 3; FormIndex++)
+ if (Opc == FMAForms[FormIndex])
+ break;
+
// Everything is ready, just adjust the FMA opcode and return it.
FormIndex = FormMapping[Case][FormIndex];
- return FoundOpcodesGroup[FormIndex];
+ return FMAForms[FormIndex];
}
MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
@@ -3852,11 +3402,11 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
OpIdx1, OpIdx2);
}
default:
- bool IsIntrinOpcode;
- if (isFMA3(MI.getOpcode(), IsIntrinOpcode)) {
- unsigned Opc = getFMA3OpcodeToCommuteOperands(MI.getOpcode(),
- IsIntrinOpcode,
- OpIdx1, OpIdx2);
+ const X86InstrFMA3Group *FMA3Group =
+ X86InstrFMA3Info::getFMA3Group(MI.getOpcode());
+ if (FMA3Group) {
+ unsigned Opc =
+ getFMA3OpcodeToCommuteOperands(MI, OpIdx1, OpIdx2, *FMA3Group);
if (Opc == 0)
return nullptr;
auto &WorkingMI = cloneIfNew(MI);
@@ -3869,21 +3419,37 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
}
}
-bool X86InstrInfo::findFMA3CommutedOpIndices(MachineInstr &MI,
- bool IsIntrinOpcode,
- unsigned &SrcOpIdx1,
- unsigned &SrcOpIdx2) const {
+bool X86InstrInfo::findFMA3CommutedOpIndices(
+ const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2,
+ const X86InstrFMA3Group &FMA3Group) const {
+ unsigned FirstCommutableVecOp = 1;
+ unsigned LastCommutableVecOp = 3;
+ unsigned KMaskOp = 0;
+ if (FMA3Group.isKMasked()) {
+ // The k-mask operand has index = 2 for masked and zero-masked operations.
+ KMaskOp = 2;
+
+ // The operand with index = 1 is used as a source for those elements for
+ // which the corresponding bit in the k-mask is set to 0.
+ if (FMA3Group.isKMergeMasked())
+ FirstCommutableVecOp = 3;
+
+ LastCommutableVecOp++;
+ }
- unsigned RegOpsNum = isMem(MI, 3) ? 2 : 3;
+ if (isMem(MI, LastCommutableVecOp))
+ LastCommutableVecOp--;
// Only the first RegOpsNum operands are commutable.
// Also, the value 'CommuteAnyOperandIndex' is valid here as it means
// that the operand is not specified/fixed.
if (SrcOpIdx1 != CommuteAnyOperandIndex &&
- (SrcOpIdx1 < 1 || SrcOpIdx1 > RegOpsNum))
+ (SrcOpIdx1 < FirstCommutableVecOp || SrcOpIdx1 > LastCommutableVecOp ||
+ SrcOpIdx1 == KMaskOp))
return false;
if (SrcOpIdx2 != CommuteAnyOperandIndex &&
- (SrcOpIdx2 < 1 || SrcOpIdx2 > RegOpsNum))
+ (SrcOpIdx2 < FirstCommutableVecOp || SrcOpIdx2 > LastCommutableVecOp ||
+ SrcOpIdx2 == KMaskOp))
return false;
// Look for two different register operands assumed to be commutable
@@ -3898,7 +3464,7 @@ bool X86InstrInfo::findFMA3CommutedOpIndices(MachineInstr &MI,
if (SrcOpIdx1 == SrcOpIdx2)
// Both of operands are not fixed. By default set one of commutable
// operands to the last register operand of the instruction.
- CommutableOpIdx2 = RegOpsNum;
+ CommutableOpIdx2 = LastCommutableVecOp;
else if (SrcOpIdx2 == CommuteAnyOperandIndex)
// Only one of operands is not fixed.
CommutableOpIdx2 = SrcOpIdx1;
@@ -3906,7 +3472,12 @@ bool X86InstrInfo::findFMA3CommutedOpIndices(MachineInstr &MI,
// CommutableOpIdx2 is well defined now. Let's choose another commutable
// operand and assign its index to CommutableOpIdx1.
unsigned Op2Reg = MI.getOperand(CommutableOpIdx2).getReg();
- for (CommutableOpIdx1 = RegOpsNum; CommutableOpIdx1 > 0; CommutableOpIdx1--) {
+ for (CommutableOpIdx1 = LastCommutableVecOp;
+ CommutableOpIdx1 >= FirstCommutableVecOp; CommutableOpIdx1--) {
+ // Just ignore and skip the k-mask operand.
+ if (CommutableOpIdx1 == KMaskOp)
+ continue;
+
// The commuted operands must have different registers.
// Otherwise, the commute transformation does not change anything and
// is useless then.
@@ -3915,7 +3486,7 @@ bool X86InstrInfo::findFMA3CommutedOpIndices(MachineInstr &MI,
}
// No appropriate commutable operands were found.
- if (CommutableOpIdx1 == 0)
+ if (CommutableOpIdx1 < FirstCommutableVecOp)
return false;
// Assign the found pair of commutable indices to SrcOpIdx1 and SrcOpidx2
@@ -3927,8 +3498,7 @@ bool X86InstrInfo::findFMA3CommutedOpIndices(MachineInstr &MI,
// Check if we can adjust the opcode to preserve the semantics when
// commute the register operands.
- return getFMA3OpcodeToCommuteOperands(MI.getOpcode(), IsIntrinOpcode,
- SrcOpIdx1, SrcOpIdx2) != 0;
+ return getFMA3OpcodeToCommuteOperands(MI, SrcOpIdx1, SrcOpIdx2, FMA3Group) != 0;
}
bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
@@ -3955,10 +3525,10 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
return false;
}
default:
- bool IsIntrinOpcode;
- if (isFMA3(MI.getOpcode(), IsIntrinOpcode))
- return findFMA3CommutedOpIndices(MI, IsIntrinOpcode,
- SrcOpIdx1, SrcOpIdx2);
+ const X86InstrFMA3Group *FMA3Group =
+ X86InstrFMA3Info::getFMA3Group(MI.getOpcode());
+ if (FMA3Group)
+ return findFMA3CommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2, *FMA3Group);
return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
}
return false;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index 7251aecaac2..5c8de0fe281 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -15,6 +15,7 @@
#define LLVM_LIB_TARGET_X86_X86INSTRINFO_H
#include "MCTargetDesc/X86BaseInfo.h"
+#include "X86InstrFMA3Info.h"
#include "X86RegisterInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -265,7 +266,7 @@ public:
unsigned &SrcOpIdx2) const override;
/// Returns true if the routine could find two commutable operands
- /// in the given FMA instruction. Otherwise, returns false.
+ /// in the given FMA instruction \p MI. Otherwise, returns false.
///
/// \p SrcOpIdx1 and \p SrcOpIdx2 are INPUT and OUTPUT arguments.
/// The output indices of the commuted operands are returned in these
@@ -274,10 +275,12 @@ public:
/// value 'CommuteAnyOperandIndex' which means that the corresponding
/// operand index is not set and this method is free to pick any of
/// available commutable operands.
+ /// The parameter \p FMA3Group keeps the reference to the group of relative
+ /// FMA3 opcodes including register/memory forms of 132/213/231 opcodes.
///
/// For example, calling this method this way:
/// unsigned Idx1 = 1, Idx2 = CommuteAnyOperandIndex;
- /// findFMA3CommutedOpIndices(MI, Idx1, Idx2);
+ /// findFMA3CommutedOpIndices(MI, Idx1, Idx2, FMA3Group);
/// can be interpreted as a query asking if the operand #1 can be swapped
/// with any other available operand (e.g. operand #2, operand #3, etc.).
///
@@ -286,9 +289,30 @@ public:
/// FMA213 #1, #2, #3
/// results into instruction with adjusted opcode:
/// FMA231 #3, #2, #1
- bool findFMA3CommutedOpIndices(MachineInstr &MI, bool IsIntrinOpcode,
+ bool findFMA3CommutedOpIndices(const MachineInstr &MI,
unsigned &SrcOpIdx1,
- unsigned &SrcOpIdx2) const;
+ unsigned &SrcOpIdx2,
+ const X86InstrFMA3Group &FMA3Group) const;
+
+ /// Returns an adjusted FMA opcode that must be used in FMA instruction that
+ /// performs the same computations as the given \p MI but which has the
+ /// operands \p SrcOpIdx1 and \p SrcOpIdx2 commuted.
+ /// It may return 0 if it is unsafe to commute the operands.
+ /// Note that a machine instruction (instead of its opcode) is passed as the
+ /// first parameter to make it possible to analyze the instruction's uses and
+ /// commute the first operand of FMA even when it seems unsafe when you look
+ /// at the opcode. For example, it is Ok to commute the first operand of
+ /// VFMADD*SD_Int, if ONLY the lowest 64-bit element of the result is used.
+ ///
+ /// The returned FMA opcode may differ from the opcode in the given \p MI.
+ /// For example, commuting the operands #1 and #3 in the following FMA
+ /// FMA213 #1, #2, #3
+ /// results into instruction with adjusted opcode:
+ /// FMA231 #3, #2, #1
+ unsigned getFMA3OpcodeToCommuteOperands(const MachineInstr &MI,
+ unsigned SrcOpIdx1,
+ unsigned SrcOpIdx2,
+ const X86InstrFMA3Group &FMA3Group) const;
// Branch analysis.
bool isUnpredicatedTerminator(const MachineInstr &MI) const override;
OpenPOWER on IntegriCloud