summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp16
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h3
-rw-r--r--llvm/lib/Target/X86/X86InstrFMA.td89
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td6
-rw-r--r--llvm/lib/Target/X86/X86IntrinsicsInfo.h2
5 files changed, 61 insertions, 55 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index dc16d06e037..8174215860b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -26099,10 +26099,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FNMADDS3_RND: return "X86ISD::FNMADDS3_RND";
case X86ISD::FMSUBS3_RND: return "X86ISD::FMSUBS3_RND";
case X86ISD::FNMSUBS3_RND: return "X86ISD::FNMSUBS3_RND";
- case X86ISD::FMADD4S: return "X86ISD::FMADD4S";
- case X86ISD::FNMADD4S: return "X86ISD::FNMADD4S";
- case X86ISD::FMSUB4S: return "X86ISD::FMSUB4S";
- case X86ISD::FNMSUB4S: return "X86ISD::FNMSUB4S";
case X86ISD::VPMADD52H: return "X86ISD::VPMADD52H";
case X86ISD::VPMADD52L: return "X86ISD::VPMADD52L";
case X86ISD::VRNDSCALE: return "X86ISD::VRNDSCALE";
@@ -37709,28 +37705,24 @@ static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
case X86ISD::FMADDS3: Opcode = X86ISD::FNMADDS3; break;
case X86ISD::FMADDS1_RND: Opcode = X86ISD::FNMADDS1_RND; break;
case X86ISD::FMADDS3_RND: Opcode = X86ISD::FNMADDS3_RND; break;
- case X86ISD::FMADD4S: Opcode = X86ISD::FNMADD4S; break;
case X86ISD::FMSUB: Opcode = X86ISD::FNMSUB; break;
case X86ISD::FMSUB_RND: Opcode = X86ISD::FNMSUB_RND; break;
case X86ISD::FMSUBS1: Opcode = X86ISD::FNMSUBS1; break;
case X86ISD::FMSUBS3: Opcode = X86ISD::FNMSUBS3; break;
case X86ISD::FMSUBS1_RND: Opcode = X86ISD::FNMSUBS1_RND; break;
case X86ISD::FMSUBS3_RND: Opcode = X86ISD::FNMSUBS3_RND; break;
- case X86ISD::FMSUB4S: Opcode = X86ISD::FNMSUB4S; break;
case X86ISD::FNMADD: Opcode = ISD::FMA; break;
case X86ISD::FNMADD_RND: Opcode = X86ISD::FMADD_RND; break;
case X86ISD::FNMADDS1: Opcode = X86ISD::FMADDS1; break;
case X86ISD::FNMADDS3: Opcode = X86ISD::FMADDS3; break;
case X86ISD::FNMADDS1_RND: Opcode = X86ISD::FMADDS1_RND; break;
case X86ISD::FNMADDS3_RND: Opcode = X86ISD::FMADDS3_RND; break;
- case X86ISD::FNMADD4S: Opcode = X86ISD::FMADD4S; break;
case X86ISD::FNMSUB: Opcode = X86ISD::FMSUB; break;
case X86ISD::FNMSUB_RND: Opcode = X86ISD::FMSUB_RND; break;
case X86ISD::FNMSUBS1: Opcode = X86ISD::FMSUBS1; break;
case X86ISD::FNMSUBS3: Opcode = X86ISD::FMSUBS3; break;
case X86ISD::FNMSUBS1_RND: Opcode = X86ISD::FMSUBS1_RND; break;
case X86ISD::FNMSUBS3_RND: Opcode = X86ISD::FMSUBS3_RND; break;
- case X86ISD::FNMSUB4S: Opcode = X86ISD::FMSUB4S; break;
}
}
@@ -37743,28 +37735,24 @@ static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
case X86ISD::FMADDS3: Opcode = X86ISD::FMSUBS3; break;
case X86ISD::FMADDS1_RND: Opcode = X86ISD::FMSUBS1_RND; break;
case X86ISD::FMADDS3_RND: Opcode = X86ISD::FMSUBS3_RND; break;
- case X86ISD::FMADD4S: Opcode = X86ISD::FMSUB4S; break;
case X86ISD::FMSUB: Opcode = ISD::FMA; break;
case X86ISD::FMSUB_RND: Opcode = X86ISD::FMADD_RND; break;
case X86ISD::FMSUBS1: Opcode = X86ISD::FMADDS1; break;
case X86ISD::FMSUBS3: Opcode = X86ISD::FMADDS3; break;
case X86ISD::FMSUBS1_RND: Opcode = X86ISD::FMADDS1_RND; break;
case X86ISD::FMSUBS3_RND: Opcode = X86ISD::FMADDS3_RND; break;
- case X86ISD::FMSUB4S: Opcode = X86ISD::FMADD4S; break;
case X86ISD::FNMADD: Opcode = X86ISD::FNMSUB; break;
case X86ISD::FNMADD_RND: Opcode = X86ISD::FNMSUB_RND; break;
case X86ISD::FNMADDS1: Opcode = X86ISD::FNMSUBS1; break;
case X86ISD::FNMADDS3: Opcode = X86ISD::FNMSUBS3; break;
case X86ISD::FNMADDS1_RND: Opcode = X86ISD::FNMSUBS1_RND; break;
case X86ISD::FNMADDS3_RND: Opcode = X86ISD::FNMSUBS3_RND; break;
- case X86ISD::FNMADD4S: Opcode = X86ISD::FNMSUB4S; break;
case X86ISD::FNMSUB: Opcode = X86ISD::FNMADD; break;
case X86ISD::FNMSUB_RND: Opcode = X86ISD::FNMADD_RND; break;
case X86ISD::FNMSUBS1: Opcode = X86ISD::FNMADDS1; break;
case X86ISD::FNMSUBS3: Opcode = X86ISD::FNMADDS3; break;
case X86ISD::FNMSUBS1_RND: Opcode = X86ISD::FNMADDS1_RND; break;
case X86ISD::FNMSUBS3_RND: Opcode = X86ISD::FNMADDS3_RND; break;
- case X86ISD::FNMSUB4S: Opcode = X86ISD::FNMADD4S; break;
}
}
@@ -39447,28 +39435,24 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::FMADDS3_RND:
case X86ISD::FMADDS1:
case X86ISD::FMADDS3:
- case X86ISD::FMADD4S:
case X86ISD::FMSUB:
case X86ISD::FMSUB_RND:
case X86ISD::FMSUBS1_RND:
case X86ISD::FMSUBS3_RND:
case X86ISD::FMSUBS1:
case X86ISD::FMSUBS3:
- case X86ISD::FMSUB4S:
case X86ISD::FNMADD:
case X86ISD::FNMADD_RND:
case X86ISD::FNMADDS1_RND:
case X86ISD::FNMADDS3_RND:
case X86ISD::FNMADDS1:
case X86ISD::FNMADDS3:
- case X86ISD::FNMADD4S:
case X86ISD::FNMSUB:
case X86ISD::FNMSUB_RND:
case X86ISD::FNMSUBS1_RND:
case X86ISD::FNMSUBS3_RND:
case X86ISD::FNMSUBS1:
case X86ISD::FNMSUBS3:
- case X86ISD::FNMSUB4S:
case ISD::FMA: return combineFMA(N, DAG, Subtarget);
case X86ISD::FMADDSUB_RND:
case X86ISD::FMSUBADD_RND:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 5f73b36d298..d7e33442181 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -501,9 +501,6 @@ namespace llvm {
FMADDSUB_RND,
FMSUBADD_RND,
- // FMA4 specific scalar intrinsics bits that zero the non-scalar bits.
- FMADD4S, FNMADD4S, FMSUB4S, FNMSUB4S,
-
// Scalar intrinsic FMA.
FMADDS1, FMADDS3,
FNMADDS1, FNMADDS3,
diff --git a/llvm/lib/Target/X86/X86InstrFMA.td b/llvm/lib/Target/X86/X86InstrFMA.td
index 50d38d9f89d..f2cf8029172 100644
--- a/llvm/lib/Target/X86/X86InstrFMA.td
+++ b/llvm/lib/Target/X86/X86InstrFMA.td
@@ -372,7 +372,7 @@ multiclass scalar_fma_patterns<SDNode Op, string Prefix, string Suffix,
(Op RC:$src2,
(EltVT (extractelt (VT VR128:$src1), (iPTR 0))),
RC:$src3))))),
- (!cast<I>(Prefix#"213"#Suffix#"r_Int")
+ (!cast<Instruction>(Prefix#"213"#Suffix#"r_Int")
VR128:$src1, (COPY_TO_REGCLASS RC:$src2, VR128),
(COPY_TO_REGCLASS RC:$src3, VR128))>;
}
@@ -432,36 +432,32 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
}
multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop,
- ValueType VT, ComplexPattern mem_cpat, SDNode OpNode,
- X86FoldableSchedWrite sched> {
-let isCodeGenOnly = 1 in {
+ ValueType VT, X86FoldableSchedWrite sched> {
+let isCodeGenOnly = 1, hasSideEffects = 0 in {
def rr_Int : FMA4S_Int<opc, MRMSrcRegOp4, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR128:$dst,
- (VT (OpNode VR128:$src1, VR128:$src2, VR128:$src3)))]>, VEX_W,
- VEX_LIG, Sched<[sched]>;
+ []>, VEX_W, VEX_LIG, Sched<[sched]>;
+ let mayLoad = 1 in
def rm_Int : FMA4S_Int<opc, MRMSrcMemOp4, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, memop:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR128:$dst, (VT (OpNode VR128:$src1, VR128:$src2,
- mem_cpat:$src3)))]>, VEX_W, VEX_LIG,
+ []>, VEX_W, VEX_LIG,
Sched<[sched.Folded, ReadAfterLd, ReadAfterLd]>;
+ let mayLoad = 1 in
def mr_Int : FMA4S_Int<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, memop:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [(set VR128:$dst,
- (VT (OpNode VR128:$src1, mem_cpat:$src2, VR128:$src3)))]>,
+ []>,
VEX_LIG, Sched<[sched.Folded, ReadAfterLd,
// memop:$src2
ReadDefault, ReadDefault, ReadDefault,
ReadDefault, ReadDefault,
// VR128::$src3
ReadAfterLd]>;
-let hasSideEffects = 0 in
def rr_Int_REV : FMA4S_Int<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
@@ -547,20 +543,20 @@ let ExeDomain = SSEPackedSingle in {
// Scalar Instructions
defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32,
SchedWriteFMA.Scl>,
- fma4s_int<0x6A, "vfmaddss", ssmem, v4f32, sse_load_f32,
- X86Fmadd4s, SchedWriteFMA.Scl>;
+ fma4s_int<0x6A, "vfmaddss", ssmem, v4f32,
+ SchedWriteFMA.Scl>;
defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32,
SchedWriteFMA.Scl>,
- fma4s_int<0x6E, "vfmsubss", ssmem, v4f32, sse_load_f32,
- X86Fmsub4s, SchedWriteFMA.Scl>;
+ fma4s_int<0x6E, "vfmsubss", ssmem, v4f32,
+ SchedWriteFMA.Scl>;
defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32,
X86Fnmadd, loadf32, SchedWriteFMA.Scl>,
- fma4s_int<0x7A, "vfnmaddss", ssmem, v4f32, sse_load_f32,
- X86Fnmadd4s, SchedWriteFMA.Scl>;
+ fma4s_int<0x7A, "vfnmaddss", ssmem, v4f32,
+ SchedWriteFMA.Scl>;
defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32,
X86Fnmsub, loadf32, SchedWriteFMA.Scl>,
- fma4s_int<0x7E, "vfnmsubss", ssmem, v4f32, sse_load_f32,
- X86Fnmsub4s, SchedWriteFMA.Scl>;
+ fma4s_int<0x7E, "vfnmsubss", ssmem, v4f32,
+ SchedWriteFMA.Scl>;
// Packed Instructions
defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32,
loadv4f32, loadv8f32, SchedWriteFMA>;
@@ -580,20 +576,20 @@ let ExeDomain = SSEPackedDouble in {
// Scalar Instructions
defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64,
SchedWriteFMA.Scl>,
- fma4s_int<0x6B, "vfmaddsd", sdmem, v2f64, sse_load_f64,
- X86Fmadd4s, SchedWriteFMA.Scl>;
+ fma4s_int<0x6B, "vfmaddsd", sdmem, v2f64,
+ SchedWriteFMA.Scl>;
defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64,
SchedWriteFMA.Scl>,
- fma4s_int<0x6F, "vfmsubsd", sdmem, v2f64, sse_load_f64,
- X86Fmsub4s, SchedWriteFMA.Scl>;
+ fma4s_int<0x6F, "vfmsubsd", sdmem, v2f64,
+ SchedWriteFMA.Scl>;
defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64,
X86Fnmadd, loadf64, SchedWriteFMA.Scl>,
- fma4s_int<0x7B, "vfnmaddsd", sdmem, v2f64, sse_load_f64,
- X86Fnmadd4s, SchedWriteFMA.Scl>;
+ fma4s_int<0x7B, "vfnmaddsd", sdmem, v2f64,
+ SchedWriteFMA.Scl>;
defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64,
X86Fnmsub, loadf64, SchedWriteFMA.Scl>,
- fma4s_int<0x7F, "vfnmsubsd", sdmem, v2f64, sse_load_f64,
- X86Fnmsub4s, SchedWriteFMA.Scl>;
+ fma4s_int<0x7F, "vfnmsubsd", sdmem, v2f64,
+ SchedWriteFMA.Scl>;
// Packed Instructions
defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64,
loadv2f64, loadv4f64, SchedWriteFMA>;
@@ -609,3 +605,40 @@ let ExeDomain = SSEPackedDouble in {
loadv2f64, loadv4f64, SchedWriteFMA>;
}
+multiclass scalar_fma4_patterns<SDNode Op, string Name,
+ SDNode Move, ValueType VT, ValueType EltVT,
+ RegisterClass RC, PatFrag mem_frag> {
+ let Predicates = [HasFMA4] in {
+ let AddedComplexity = 15 in
+ def : Pat<(VT (X86vzmovl (VT (scalar_to_vector
+ (Op RC:$src1, RC:$src2, RC:$src3))))),
+ (!cast<Instruction>(Name#"rr_Int")
+ (COPY_TO_REGCLASS RC:$src1, VR128),
+ (COPY_TO_REGCLASS RC:$src2, VR128),
+ (COPY_TO_REGCLASS RC:$src3, VR128))>;
+
+ def : Pat<(VT (X86vzmovl (VT (scalar_to_vector
+ (Op RC:$src1, RC:$src2,
+ (mem_frag addr:$src3)))))),
+ (!cast<Instruction>(Name#"rm_Int")
+ (COPY_TO_REGCLASS RC:$src1, VR128),
+ (COPY_TO_REGCLASS RC:$src2, VR128), addr:$src3)>;
+
+ def : Pat<(VT (X86vzmovl (VT (scalar_to_vector
+ (Op RC:$src1, (mem_frag addr:$src2),
+ RC:$src3))))),
+ (!cast<Instruction>(Name#"mr_Int")
+ (COPY_TO_REGCLASS RC:$src1, VR128), addr:$src2,
+ (COPY_TO_REGCLASS RC:$src3, VR128))>;
+ }
+}
+
+defm : scalar_fma4_patterns<X86Fmadd, "VFMADDSS4", X86Movss, v4f32, f32, FR32, loadf32>;
+defm : scalar_fma4_patterns<X86Fmsub, "VFMSUBSS4", X86Movss, v4f32, f32, FR32, loadf32>;
+defm : scalar_fma4_patterns<X86Fnmadd, "VFNMADDSS4", X86Movss, v4f32, f32, FR32, loadf32>;
+defm : scalar_fma4_patterns<X86Fnmsub, "VFNMSUBSS4", X86Movss, v4f32, f32, FR32, loadf32>;
+
+defm : scalar_fma4_patterns<X86Fmadd, "VFMADDSD4", X86Movsd, v2f64, f64, FR64, loadf64>;
+defm : scalar_fma4_patterns<X86Fmsub, "VFMSUBSD4", X86Movsd, v2f64, f64, FR64, loadf64>;
+defm : scalar_fma4_patterns<X86Fnmadd, "VFNMADDSD4", X86Movsd, v2f64, f64, FR64, loadf64>;
+defm : scalar_fma4_patterns<X86Fnmsub, "VFNMSUBSD4", X86Movsd, v2f64, f64, FR64, loadf64>;
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 824ad7191ec..cab47881bb4 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -483,12 +483,6 @@ def X86FnmsubRnd : SDNode<"X86ISD::FNMSUB_RND", SDTFmaRound, [SDNPCommutat
def X86FmaddsubRnd : SDNode<"X86ISD::FMADDSUB_RND", SDTFmaRound, [SDNPCommutative]>;
def X86FmsubaddRnd : SDNode<"X86ISD::FMSUBADD_RND", SDTFmaRound, [SDNPCommutative]>;
-// Scalar FMA4 intrinsics which zero the non-scalar bits.
-def X86Fmadd4s : SDNode<"X86ISD::FMADD4S", SDTFPTernaryOp, [SDNPCommutative]>;
-def X86Fnmadd4s : SDNode<"X86ISD::FNMADD4S", SDTFPTernaryOp, [SDNPCommutative]>;
-def X86Fmsub4s : SDNode<"X86ISD::FMSUB4S", SDTFPTernaryOp, [SDNPCommutative]>;
-def X86Fnmsub4s : SDNode<"X86ISD::FNMSUB4S", SDTFPTernaryOp, [SDNPCommutative]>;
-
// Scalar FMA intrinsics with passthru bits in operand 1.
def X86Fmadds1 : SDNode<"X86ISD::FMADDS1", SDTFPTernaryOp>;
def X86Fnmadds1 : SDNode<"X86ISD::FNMADDS1", SDTFPTernaryOp>;
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 0442759d584..9f712d848e2 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -1157,8 +1157,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_vpshrd_w_512, INTR_TYPE_3OP_IMM8, X86ISD::VSHRD, 0),
X86_INTRINSIC_DATA(bmi_bextr_32, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
X86_INTRINSIC_DATA(bmi_bextr_64, INTR_TYPE_2OP, X86ISD::BEXTR, 0),
- X86_INTRINSIC_DATA(fma4_vfmadd_sd, INTR_TYPE_3OP, X86ISD::FMADD4S, 0),
- X86_INTRINSIC_DATA(fma4_vfmadd_ss, INTR_TYPE_3OP, X86ISD::FMADD4S, 0),
X86_INTRINSIC_DATA(sse_cmp_ps, INTR_TYPE_3OP, X86ISD::CMPP, 0),
X86_INTRINSIC_DATA(sse_comieq_ss, COMI, X86ISD::COMI, ISD::SETEQ),
X86_INTRINSIC_DATA(sse_comige_ss, COMI, X86ISD::COMI, ISD::SETGE),
OpenPOWER on IntegriCloud