summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew V. Tischenko <andrew.v.tischenko@gmail.com>2017-04-14 07:44:23 +0000
committerAndrew V. Tischenko <andrew.v.tischenko@gmail.com>2017-04-14 07:44:23 +0000
commit75745d0c3e612b54af6b1cfb62be69356ad155a2 (patch)
treea3f1747fb0a43a563d84555b421355b9ea80059d
parent334f8fbe4766f507629003a42caa3f5f143b1e90 (diff)
downloadbcm5719-llvm-75745d0c3e612b54af6b1cfb62be69356ad155a2.tar.gz
bcm5719-llvm-75745d0c3e612b54af6b1cfb62be69356ad155a2.zip
This patch closes PR#32216: Better testing of schedule model instruction latencies/throughputs.
The details are here: https://reviews.llvm.org/D30941 llvm-svn: 300311
-rw-r--r--llvm/include/llvm/CodeGen/AsmPrinter.h3
-rw-r--r--llvm/include/llvm/CodeGen/TargetSchedule.h4
-rw-r--r--llvm/include/llvm/MC/MCObjectStreamer.h3
-rw-r--r--llvm/include/llvm/MC/MCStreamer.h4
-rw-r--r--llvm/include/llvm/MC/MCSubtargetInfo.h11
-rw-r--r--llvm/include/llvm/Target/TargetSubtargetInfo.h8
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp46
-rw-r--r--llvm/lib/CodeGen/TargetSchedule.cpp71
-rw-r--r--llvm/lib/CodeGen/TargetSubtargetInfo.cpp46
-rw-r--r--llvm/lib/MC/MCAsmStreamer.cpp34
-rw-r--r--llvm/lib/MC/MCObjectStreamer.cpp2
-rw-r--r--llvm/lib/MC/MCStreamer.cpp4
-rw-r--r--llvm/lib/Object/RecordStreamer.cpp2
-rw-r--r--llvm/lib/Object/RecordStreamer.h3
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp4
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h3
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp2
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h3
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp4
-rw-r--r--llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp2
-rw-r--r--llvm/lib/Target/X86/X86MCInstLower.cpp18
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.h3
-rw-r--r--llvm/test/CodeGen/X86/recip-fastmath.ll412
-rw-r--r--llvm/test/CodeGen/X86/recip-fastmath2.ll814
26 files changed, 874 insertions, 638 deletions
diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h
index 772043fa3ce..fb8c8408fc7 100644
--- a/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -112,6 +112,9 @@ public:
typedef std::pair<const GlobalVariable *, unsigned> GOTEquivUsePair;
MapVector<const MCSymbol *, GOTEquivUsePair> GlobalGOTEquivs;
+ /// Enable print [latency:throughput] in output
+ bool EnablePrintSchedInfo = false;
+
private:
MCSymbol *CurrentFnBegin = nullptr;
MCSymbol *CurrentFnEnd = nullptr;
diff --git a/llvm/include/llvm/CodeGen/TargetSchedule.h b/llvm/include/llvm/CodeGen/TargetSchedule.h
index 0c5a84e0e3b..1992412120a 100644
--- a/llvm/include/llvm/CodeGen/TargetSchedule.h
+++ b/llvm/include/llvm/CodeGen/TargetSchedule.h
@@ -189,6 +189,10 @@ public:
/// This is typically one cycle.
unsigned computeOutputLatency(const MachineInstr *DefMI, unsigned DefIdx,
const MachineInstr *DepMI) const;
+
+ /// \brief Compute the reciprocal throughput of the given instruction.
+ Optional<double> computeInstrRThroughput(const MachineInstr *MI) const;
+ Optional<double> computeInstrRThroughput(unsigned Opcode) const;
};
} // end namespace llvm
diff --git a/llvm/include/llvm/MC/MCObjectStreamer.h b/llvm/include/llvm/MC/MCObjectStreamer.h
index 11f8dfa2448..7c1189e46ab 100644
--- a/llvm/include/llvm/MC/MCObjectStreamer.h
+++ b/llvm/include/llvm/MC/MCObjectStreamer.h
@@ -98,7 +98,8 @@ public:
void EmitSLEB128Value(const MCExpr *Value) override;
void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) override;
void ChangeSection(MCSection *Section, const MCExpr *Subsection) override;
- void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo& STI) override;
+ void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+ bool = false) override;
/// \brief Emit an instruction to a special fragment, because this instruction
/// can change its size during relaxation.
diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h
index c0d322e3ed3..e466b368ed3 100644
--- a/llvm/include/llvm/MC/MCStreamer.h
+++ b/llvm/include/llvm/MC/MCStreamer.h
@@ -836,7 +836,9 @@ public:
}
/// \brief Emit the given \p Instruction into the current section.
- virtual void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI);
+ /// PrintSchedInfo == true then schedul comment should be added to output
+ virtual void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+ bool PrintSchedInfo = false);
/// \brief Set the bundle alignment mode from now on in the section.
/// The argument is the power of 2 to which the alignment is set. The
diff --git a/llvm/include/llvm/MC/MCSubtargetInfo.h b/llvm/include/llvm/MC/MCSubtargetInfo.h
index bbdac8fad5f..d8fb2dc8dcd 100644
--- a/llvm/include/llvm/MC/MCSubtargetInfo.h
+++ b/llvm/include/llvm/MC/MCSubtargetInfo.h
@@ -26,6 +26,8 @@
#include <string>
namespace llvm {
+class MachineInstr;
+class MCInst;
//===----------------------------------------------------------------------===//
///
@@ -167,6 +169,15 @@ public:
auto Found = std::lower_bound(ProcDesc.begin(), ProcDesc.end(), CPU);
return Found != ProcDesc.end() && StringRef(Found->Key) == CPU;
}
+
+ /// Returns string representation of scheduler comment
+ virtual std::string getSchedInfoStr(const MachineInstr &MI) const {
+ return std::string();
+ }
+
+ virtual std::string getSchedInfoStr(MCInst const &MCI) const {
+ return std::string();
+ }
};
} // end namespace llvm
diff --git a/llvm/include/llvm/Target/TargetSubtargetInfo.h b/llvm/include/llvm/Target/TargetSubtargetInfo.h
index 0b435159602..83950a9cd02 100644
--- a/llvm/include/llvm/Target/TargetSubtargetInfo.h
+++ b/llvm/include/llvm/Target/TargetSubtargetInfo.h
@@ -20,6 +20,7 @@
#include "llvm/CodeGen/PBQPRAConstraint.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/ScheduleDAGMutation.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/CodeGen.h"
#include <memory>
@@ -143,6 +144,9 @@ public:
/// TargetLowering preference). It does not yet disable the postRA scheduler.
virtual bool enableMachineScheduler() const;
+ /// \brief Support printing of [latency:throughput] comment in output .S file.
+ virtual bool supportPrintSchedInfo() const { return false; }
+
/// \brief True if the machine scheduler should disable the TLI preference
/// for preRA scheduling with the source level scheduler.
virtual bool enableMachineSchedDefaultSched() const { return true; }
@@ -227,6 +231,10 @@ public:
/// Please use MachineRegisterInfo::subRegLivenessEnabled() instead where
/// possible.
virtual bool enableSubRegLiveness() const { return false; }
+
+ /// Returns string representation of scheduler comment
+ std::string getSchedInfoStr(const MachineInstr &MI) const override;
+ std::string getSchedInfoStr(MCInst const &MCI) const override;
};
} // end namespace llvm
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 834a59a12a6..6c18d56b827 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -123,6 +123,10 @@ static const char *const CodeViewLineTablesGroupDescription =
STATISTIC(EmittedInsts, "Number of machine instrs printed");
+static cl::opt<bool>
+ PrintSchedule("print-schedule", cl::Hidden, cl::init(false),
+ cl::desc("Print 'sched: [latency:throughput]' in .s output"));
+
char AsmPrinter::ID = 0;
typedef DenseMap<GCStrategy*, std::unique_ptr<GCMetadataPrinter>> gcp_map_type;
@@ -720,7 +724,8 @@ void AsmPrinter::EmitFunctionEntryLabel() {
}
/// emitComments - Pretty-print comments for instructions.
-static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
+static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS,
+ AsmPrinter *AP) {
const MachineFunction *MF = MI.getParent()->getParent();
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
@@ -728,6 +733,7 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
int FI;
const MachineFrameInfo &MFI = MF->getFrameInfo();
+ bool Commented = false;
// We assume a single instruction only has a spill or reload, not
// both.
@@ -735,24 +741,39 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
if (TII->isLoadFromStackSlotPostFE(MI, FI)) {
if (MFI.isSpillSlotObjectIndex(FI)) {
MMO = *MI.memoperands_begin();
- CommentOS << MMO->getSize() << "-byte Reload\n";
+ CommentOS << MMO->getSize() << "-byte Reload";
+ Commented = true;
}
} else if (TII->hasLoadFromStackSlot(MI, MMO, FI)) {
- if (MFI.isSpillSlotObjectIndex(FI))
- CommentOS << MMO->getSize() << "-byte Folded Reload\n";
+ if (MFI.isSpillSlotObjectIndex(FI)) {
+ CommentOS << MMO->getSize() << "-byte Folded Reload";
+ Commented = true;
+ }
} else if (TII->isStoreToStackSlotPostFE(MI, FI)) {
if (MFI.isSpillSlotObjectIndex(FI)) {
MMO = *MI.memoperands_begin();
- CommentOS << MMO->getSize() << "-byte Spill\n";
+ CommentOS << MMO->getSize() << "-byte Spill";
+ Commented = true;
}
} else if (TII->hasStoreToStackSlot(MI, MMO, FI)) {
- if (MFI.isSpillSlotObjectIndex(FI))
- CommentOS << MMO->getSize() << "-byte Folded Spill\n";
+ if (MFI.isSpillSlotObjectIndex(FI)) {
+ CommentOS << MMO->getSize() << "-byte Folded Spill";
+ Commented = true;
+ }
}
// Check for spill-induced copies
- if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse))
- CommentOS << " Reload Reuse\n";
+ if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse)) {
+ Commented = true;
+ CommentOS << " Reload Reuse";
+ }
+
+ if (Commented && AP->EnablePrintSchedInfo)
+ // If any comment was added above and we need sched info comment then
+ // add this new comment just after the above comment w/o "\n" between them.
+ CommentOS << " " << MF->getSubtarget().getSchedInfoStr(MI) << "\n";
+ else if (Commented)
+ CommentOS << "\n";
}
/// emitImplicitDef - This method emits the specified machine instruction
@@ -966,7 +987,7 @@ void AsmPrinter::EmitFunctionBody() {
}
if (isVerbose())
- emitComments(MI, OutStreamer->GetCommentOS());
+ emitComments(MI, OutStreamer->GetCommentOS(), this);
switch (MI.getOpcode()) {
case TargetOpcode::CFI_INSTRUCTION:
@@ -1383,6 +1404,11 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
if (isVerbose())
LI = &getAnalysis<MachineLoopInfo>();
+
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ EnablePrintSchedInfo = PrintSchedule.getNumOccurrences()
+ ? PrintSchedule
+ : STI.supportPrintSchedInfo();
}
namespace {
diff --git a/llvm/lib/CodeGen/TargetSchedule.cpp b/llvm/lib/CodeGen/TargetSchedule.cpp
index 04edf0e6285..0df34ce4311 100644
--- a/llvm/lib/CodeGen/TargetSchedule.cpp
+++ b/llvm/lib/CodeGen/TargetSchedule.cpp
@@ -277,7 +277,11 @@ unsigned TargetSchedModel::computeInstrLatency(unsigned Opcode) const {
if (SCDesc->isValid() && !SCDesc->isVariant())
return computeInstrLatency(*SCDesc);
- llvm_unreachable("No MI sched latency");
+ if (SCDesc->isValid()) {
+ assert (!SCDesc->isVariant() && "No MI sched latency: SCDesc->isVariant()");
+ return computeInstrLatency(*SCDesc);
+ }
+ return 0;
}
unsigned
@@ -331,3 +335,68 @@ computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx,
}
return 0;
}
+
+static Optional<double>
+getRTroughputFromItineraries(unsigned schedClass,
+ const InstrItineraryData *IID){
+ double Unknown = std::numeric_limits<double>::infinity();
+ double Throughput = Unknown;
+
+ for (const InstrStage *IS = IID->beginStage(schedClass),
+ *E = IID->endStage(schedClass);
+ IS != E; ++IS) {
+ unsigned Cycles = IS->getCycles();
+ if (!Cycles)
+ continue;
+ Throughput =
+ std::min(Throughput, countPopulation(IS->getUnits()) * 1.0 / Cycles);
+ }
+ // We need reciprocal throughput that's why we return such value.
+ return 1 / Throughput;
+}
+
+static Optional<double>
+getRTroughputFromInstrSchedModel(const MCSchedClassDesc *SCDesc,
+ const TargetSubtargetInfo *STI,
+ const MCSchedModel &SchedModel) {
+ double Unknown = std::numeric_limits<double>::infinity();
+ double Throughput = Unknown;
+
+ for (const MCWriteProcResEntry *WPR = STI->getWriteProcResBegin(SCDesc),
+ *WEnd = STI->getWriteProcResEnd(SCDesc);
+ WPR != WEnd; ++WPR) {
+ unsigned Cycles = WPR->Cycles;
+ if (!Cycles)
+ return Optional<double>();
+
+ unsigned NumUnits =
+ SchedModel.getProcResource(WPR->ProcResourceIdx)->NumUnits;
+ Throughput = std::min(Throughput, NumUnits * 1.0 / Cycles);
+ }
+ // We need reciprocal throughput that's why we return such value.
+ return 1 / Throughput;
+}
+
+Optional<double>
+TargetSchedModel::computeInstrRThroughput(const MachineInstr *MI) const {
+ if (hasInstrItineraries())
+ return getRTroughputFromItineraries(MI->getDesc().getSchedClass(),
+ getInstrItineraries());
+ if (hasInstrSchedModel())
+ return getRTroughputFromInstrSchedModel(resolveSchedClass(MI), STI,
+ SchedModel);
+ return Optional<double>();
+}
+
+Optional<double>
+TargetSchedModel::computeInstrRThroughput(unsigned Opcode) const {
+ unsigned SchedClass = TII->get(Opcode).getSchedClass();
+ if (hasInstrItineraries())
+ return getRTroughputFromItineraries(SchedClass, getInstrItineraries());
+ if (hasInstrSchedModel()) {
+ const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass);
+ if (SCDesc->isValid() && !SCDesc->isVariant())
+ return getRTroughputFromInstrSchedModel(SCDesc, STI, SchedModel);
+ }
+ return Optional<double>();
+}
diff --git a/llvm/lib/CodeGen/TargetSubtargetInfo.cpp b/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
index c74707d95b9..0a444e0fff0 100644
--- a/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
+++ b/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
@@ -11,6 +11,9 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
@@ -52,3 +55,46 @@ bool TargetSubtargetInfo::enablePostRAScheduler() const {
bool TargetSubtargetInfo::useAA() const {
return false;
}
+
+static std::string createSchedInfoStr(unsigned Latency,
+ Optional<double> RThroughput) {
+ static const char *SchedPrefix = " sched: [";
+ std::string Comment;
+ raw_string_ostream CS(Comment);
+ if (Latency > 0 && RThroughput.hasValue())
+ CS << SchedPrefix << Latency << format(":%2.2f", RThroughput.getValue())
+ << "]";
+ else if (Latency > 0)
+ CS << SchedPrefix << Latency << ":?]";
+ else if (RThroughput.hasValue())
+ CS << SchedPrefix << "?:" << RThroughput.getValue() << "]";
+ CS.flush();
+ return Comment;
+}
+
+/// Returns string representation of scheduler comment
+std::string TargetSubtargetInfo::getSchedInfoStr(const MachineInstr &MI) const {
+ if (MI.isPseudo() || MI.isTerminator())
+ return std::string();
+ // We don't cache TSchedModel because it depends on TargetInstrInfo
+ // that could be changed during the compilation
+ TargetSchedModel TSchedModel;
+ TSchedModel.init(getSchedModel(), this, getInstrInfo());
+ unsigned Latency = TSchedModel.computeInstrLatency(&MI);
+ Optional<double> RThroughput = TSchedModel.computeInstrRThroughput(&MI);
+ return createSchedInfoStr(Latency, RThroughput);
+}
+
+/// Returns string representation of scheduler comment
+std::string TargetSubtargetInfo::getSchedInfoStr(MCInst const &MCI) const {
+ // We don't cache TSchedModel because it depends on TargetInstrInfo
+ // that could be changed during the compilation
+ TargetSchedModel TSchedModel;
+ TSchedModel.init(getSchedModel(), this, getInstrInfo());
+ if (!TSchedModel.hasInstrSchedModel())
+ return std::string();
+ unsigned Latency = TSchedModel.computeInstrLatency(MCI.getOpcode());
+ Optional<double> RThroughput =
+ TSchedModel.computeInstrRThroughput(MCI.getOpcode());
+ return createSchedInfoStr(Latency, RThroughput);
+}
diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp
index 92dcf535ec9..9e5553fa8d4 100644
--- a/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/llvm/lib/MC/MCAsmStreamer.cpp
@@ -103,7 +103,10 @@ public:
void AddComment(const Twine &T, bool EOL = true) override;
/// AddEncodingComment - Add a comment showing the encoding of an instruction.
- void AddEncodingComment(const MCInst &Inst, const MCSubtargetInfo &);
+ /// If PrintSchedInfo - is true then the comment sched:[x:y] should
+ // be added to output if it's being supported by target
+ void AddEncodingComment(const MCInst &Inst, const MCSubtargetInfo &,
+ bool PrintSchedInfo);
/// GetCommentOS - Return a raw_ostream that comments can be written to.
/// Unlike AddComment, you are required to terminate comments with \n if you
@@ -278,7 +281,8 @@ public:
void EmitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except) override;
void EmitWinEHHandlerData() override;
- void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
+ void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+ bool PrintSchedInfo) override;
void EmitBundleAlignMode(unsigned AlignPow2) override;
void EmitBundleLock(bool AlignToEnd) override;
@@ -1504,7 +1508,8 @@ void MCAsmStreamer::EmitWinCFIEndProlog() {
}
void MCAsmStreamer::AddEncodingComment(const MCInst &Inst,
- const MCSubtargetInfo &STI) {
+ const MCSubtargetInfo &STI,
+ bool PrintSchedInfo) {
raw_ostream &OS = GetCommentOS();
SmallString<256> Code;
SmallVector<MCFixup, 4> Fixups;
@@ -1577,7 +1582,11 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst,
}
}
}
- OS << "]\n";
+ OS << "]";
+ // If we are not going to add fixup or schedul comments after this point then
+ // we have to end the current comment line with "\n".
+ if (Fixups.size() || !PrintSchedInfo)
+ OS << "\n";
for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
MCFixup &F = Fixups[i];
@@ -1588,16 +1597,19 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst,
}
void MCAsmStreamer::EmitInstruction(const MCInst &Inst,
- const MCSubtargetInfo &STI) {
+ const MCSubtargetInfo &STI,
+ bool PrintSchedInfo) {
assert(getCurrentSectionOnly() &&
"Cannot emit contents before setting section!");
// Show the encoding in a comment if we have a code emitter.
if (Emitter)
- AddEncodingComment(Inst, STI);
+ AddEncodingComment(Inst, STI, PrintSchedInfo);
// Show the MCInst if enabled.
if (ShowInst) {
+ if (PrintSchedInfo)
+ GetCommentOS() << "\n";
Inst.dump_pretty(GetCommentOS(), InstPrinter.get(), "\n ");
GetCommentOS() << "\n";
}
@@ -1607,6 +1619,16 @@ void MCAsmStreamer::EmitInstruction(const MCInst &Inst,
else
InstPrinter->printInst(&Inst, OS, "", STI);
+ if (PrintSchedInfo) {
+ std::string SI = STI.getSchedInfoStr(Inst);
+ if (!SI.empty())
+ GetCommentOS() << SI;
+ }
+
+ StringRef Comments = CommentToEmit;
+ if (Comments.size() && Comments.back() != '\n')
+ GetCommentOS() << "\n";
+
EmitEOL();
}
diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp
index 726326be2ee..f7f2253256e 100644
--- a/llvm/lib/MC/MCObjectStreamer.cpp
+++ b/llvm/lib/MC/MCObjectStreamer.cpp
@@ -238,7 +238,7 @@ bool MCObjectStreamer::mayHaveInstructions(MCSection &Sec) const {
}
void MCObjectStreamer::EmitInstruction(const MCInst &Inst,
- const MCSubtargetInfo &STI) {
+ const MCSubtargetInfo &STI, bool) {
MCStreamer::EmitInstruction(Inst, STI);
MCSection *Sec = getCurrentSectionOnly();
diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp
index b9c01c66f31..c9a6f12b6a5 100644
--- a/llvm/lib/MC/MCStreamer.cpp
+++ b/llvm/lib/MC/MCStreamer.cpp
@@ -777,8 +777,8 @@ void MCStreamer::visitUsedExpr(const MCExpr &Expr) {
}
}
-void MCStreamer::EmitInstruction(const MCInst &Inst,
- const MCSubtargetInfo &STI) {
+void MCStreamer::EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+ bool) {
// Scan for values.
for (unsigned i = Inst.getNumOperands(); i--;)
if (Inst.getOperand(i).isExpr())
diff --git a/llvm/lib/Object/RecordStreamer.cpp b/llvm/lib/Object/RecordStreamer.cpp
index a5018443b87..c9c27451f80 100644
--- a/llvm/lib/Object/RecordStreamer.cpp
+++ b/llvm/lib/Object/RecordStreamer.cpp
@@ -78,7 +78,7 @@ RecordStreamer::const_iterator RecordStreamer::end() { return Symbols.end(); }
RecordStreamer::RecordStreamer(MCContext &Context) : MCStreamer(Context) {}
void RecordStreamer::EmitInstruction(const MCInst &Inst,
- const MCSubtargetInfo &STI) {
+ const MCSubtargetInfo &STI, bool) {
MCStreamer::EmitInstruction(Inst, STI);
}
diff --git a/llvm/lib/Object/RecordStreamer.h b/llvm/lib/Object/RecordStreamer.h
index c3bd5b09a9b..a845ecd786a 100644
--- a/llvm/lib/Object/RecordStreamer.h
+++ b/llvm/lib/Object/RecordStreamer.h
@@ -34,7 +34,8 @@ public:
const_iterator begin();
const_iterator end();
RecordStreamer(MCContext &Context);
- void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
+ void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+ bool) override;
void EmitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) override;
bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override;
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index 5903e1e36d4..271263507ae 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -102,8 +102,8 @@ public:
/// This function is the one used to emit instruction data into the ELF
/// streamer. We override it to add the appropriate mapping symbol if
/// necessary.
- void EmitInstruction(const MCInst &Inst,
- const MCSubtargetInfo &STI) override {
+ void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+ bool) override {
EmitA64MappingSymbol();
MCELFStreamer::EmitInstruction(Inst, STI);
}
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 774a0b3771b..6fa890ba1cd 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -477,8 +477,8 @@ public:
/// This function is the one used to emit instruction data into the ELF
/// streamer. We override it to add the appropriate mapping symbol if
/// necessary.
- void EmitInstruction(const MCInst& Inst,
- const MCSubtargetInfo &STI) override {
+ void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+ bool) override {
if (IsThumb)
EmitThumbMappingSymbol();
else
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
index 09819ccedd8..9e1ff9ca35d 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
@@ -44,7 +44,7 @@ static cl::opt<unsigned> GPSize
cl::init(8));
void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCB,
- const MCSubtargetInfo &STI) {
+ const MCSubtargetInfo &STI, bool) {
assert(MCB.getOpcode() == Hexagon::BUNDLE);
assert(HexagonMCInstrInfo::bundleSize(MCB) <= HEXAGON_PACKET_SIZE);
assert(HexagonMCInstrInfo::bundleSize(MCB) > 0);
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h
index 5cb84a48a31..024dff1a2f9 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h
@@ -34,7 +34,8 @@ public:
MCELFStreamer(Context, TAB, OS, Emitter),
MCII (createHexagonMCInstrInfo()) {}
- void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
+ void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+ bool) override;
void EmitSymbol(const MCInst &Inst);
void HexagonMCEmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment,
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
index 4eeccc3995f..ae327832231 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
@@ -20,7 +20,7 @@
using namespace llvm;
void MipsELFStreamer::EmitInstruction(const MCInst &Inst,
- const MCSubtargetInfo &STI) {
+ const MCSubtargetInfo &STI, bool) {
MCELFStreamer::EmitInstruction(Inst, STI);
MCContext &Context = getContext();
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
index 72cde1c9084..f5eda112817 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
@@ -45,7 +45,8 @@ public:
/// \p Inst is actually emitted. For example, we can inspect the operands and
/// gather sufficient information that allows us to reason about the register
/// usage for the translation unit.
- void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
+ void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+ bool = false) override;
/// Overriding this function allows us to record all labels that should be
/// marked as microMIPS. Based on this data marking is done in
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
index 8c2617a687b..9266f0e216d 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
@@ -139,8 +139,8 @@ private:
public:
/// This function is the one used to emit instruction data into the ELF
/// streamer. We override it to mask dangerous instructions.
- void EmitInstruction(const MCInst &Inst,
- const MCSubtargetInfo &STI) override {
+ void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+ bool) override {
// Sandbox indirect jumps.
if (isIndirectJump(Inst)) {
if (PendingCall)
diff --git a/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp b/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
index 8594addb5dd..6e062ec5934 100644
--- a/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -1189,8 +1189,6 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
OS << ']';
--i; // For loop increments element #.
}
- //MI->print(OS, 0);
- OS << "\n";
// We successfully added a comment to this instruction.
return true;
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index 55b090b6764..550e3543a71 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -102,7 +102,7 @@ void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding(
}
void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) {
- OutStreamer->EmitInstruction(Inst, getSubtargetInfo());
+ OutStreamer->EmitInstruction(Inst, getSubtargetInfo(), EnablePrintSchedInfo);
SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get());
}
@@ -1529,7 +1529,8 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
SmallVector<int, 64> Mask;
DecodePSHUFBMask(C, Mask);
if (!Mask.empty())
- OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
+ OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask),
+ !EnablePrintSchedInfo);
}
break;
}
@@ -1600,7 +1601,8 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
SmallVector<int, 16> Mask;
DecodeVPERMILPMask(C, ElSize, Mask);
if (!Mask.empty())
- OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
+ OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask),
+ !EnablePrintSchedInfo);
}
break;
}
@@ -1630,7 +1632,8 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
SmallVector<int, 16> Mask;
DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Mask);
if (!Mask.empty())
- OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask));
+ OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask),
+ !EnablePrintSchedInfo);
}
break;
}
@@ -1646,7 +1649,8 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
SmallVector<int, 16> Mask;
DecodeVPPERMMask(C, Mask);
if (!Mask.empty())
- OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask));
+ OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask),
+ !EnablePrintSchedInfo);
}
break;
}
@@ -1706,7 +1710,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
CS << "?";
}
CS << "]";
- OutStreamer->AddComment(CS.str());
+ OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
} else if (auto *CV = dyn_cast<ConstantVector>(C)) {
CS << "<";
for (int i = 0, NumOperands = CV->getNumOperands(); i < NumOperands; ++i) {
@@ -1738,7 +1742,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
}
CS << ">";
- OutStreamer->AddComment(CS.str());
+ OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
}
}
break;
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index c2c95658482..d0d88d32694 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -624,6 +624,9 @@ public:
/// Enable the MachineScheduler pass for all X86 subtargets.
bool enableMachineScheduler() const override { return true; }
+ // TODO: Update the regression tests and return true.
+ bool supportPrintSchedInfo() const override { return false; }
+
bool enableEarlyIfConversion() const override;
/// Return the instruction itineraries based on the subtarget selection.
diff --git a/llvm/test/CodeGen/X86/recip-fastmath.ll b/llvm/test/CodeGen/X86/recip-fastmath.ll
index 5fd553b301a..16e261bf3c5 100644
--- a/llvm/test/CodeGen/X86/recip-fastmath.ll
+++ b/llvm/test/CodeGen/X86/recip-fastmath.ll
@@ -2,12 +2,12 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE-RECIP
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX-RECIP
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=FMA-RECIP
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge| FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BTVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=SANDY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -mattr=-fma | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL-NO-FMA
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 --check-prefix=KNL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 --check-prefix=SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 --check-prefix=KNL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 --check-prefix=SKX
; If the target's divss/divps instructions are substantially
; slower than rcpss/rcpps with a Newton-Raphson refinement,
@@ -25,11 +25,47 @@ define float @f32_no_estimate(float %x) #0 {
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-LABEL: f32_no_estimate:
-; AVX: # BB#0:
-; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0
-; AVX-NEXT: retq
+; AVX-RECIP-LABEL: f32_no_estimate:
+; AVX-RECIP: # BB#0:
+; AVX-RECIP-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-RECIP-NEXT: vdivss %xmm0, %xmm1, %xmm0
+; AVX-RECIP-NEXT: retq
+;
+; FMA-RECIP-LABEL: f32_no_estimate:
+; FMA-RECIP: # BB#0:
+; FMA-RECIP-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; FMA-RECIP-NEXT: vdivss %xmm0, %xmm1, %xmm0
+; FMA-RECIP-NEXT: retq
+;
+; BTVER2-LABEL: f32_no_estimate:
+; BTVER2: # BB#0:
+; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
+; BTVER2-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [19:19.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
+;
+; SANDY-LABEL: f32_no_estimate:
+; SANDY: # BB#0:
+; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
+; SANDY-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
+;
+; HASWELL-LABEL: f32_no_estimate:
+; HASWELL: # BB#0:
+; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
+; HASWELL-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
+;
+; HASWELL-NO-FMA-LABEL: f32_no_estimate:
+; HASWELL-NO-FMA: # BB#0:
+; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; HASWELL-NO-FMA-NEXT: vdivss %xmm0, %xmm1, %xmm0
+; HASWELL-NO-FMA-NEXT: retq
+;
+; AVX512-LABEL: f32_no_estimate:
+; AVX512: # BB#0:
+; AVX512-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50]
+; AVX512-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
+; AVX512-NEXT: retq # sched: [1:1.00]
%div = fdiv fast float 1.0, %x
ret float %div
}
@@ -65,30 +101,30 @@ define float @f32_one_step(float %x) #1 {
;
; BTVER2-LABEL: f32_one_step:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1
-; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0
-; BTVER2-NEXT: vsubss %xmm0, %xmm2, %xmm0
-; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0
-; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0
-; BTVER2-NEXT: retq
+; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
+; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [2:1.00]
+; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: f32_one_step:
; SANDY: # BB#0:
-; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1
-; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0
-; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0
-; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0
-; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0
-; SANDY-NEXT: retq
+; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
+; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
+; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: f32_one_step:
; HASWELL: # BB#0:
-; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0
; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0
-; HASWELL-NEXT: retq
+; HASWELL-NEXT: retq # sched: [1:1.00]
;
; HASWELL-NO-FMA-LABEL: f32_one_step:
; HASWELL-NO-FMA: # BB#0:
@@ -105,7 +141,7 @@ define float @f32_one_step(float %x) #1 {
; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1
; AVX512-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0
; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0
-; AVX512-NEXT: retq
+; AVX512-NEXT: retq # sched: [1:1.00]
%div = fdiv fast float 1.0, %x
ret float %div
}
@@ -155,42 +191,42 @@ define float @f32_two_step(float %x) #2 {
;
; BTVER2-LABEL: f32_two_step:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1
-; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm2
-; BTVER2-NEXT: vsubss %xmm2, %xmm3, %xmm2
-; BTVER2-NEXT: vmulss %xmm2, %xmm1, %xmm2
-; BTVER2-NEXT: vaddss %xmm2, %xmm1, %xmm1
-; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0
-; BTVER2-NEXT: vsubss %xmm0, %xmm3, %xmm0
-; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0
-; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0
-; BTVER2-NEXT: retq
+; BTVER2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [5:1.00]
+; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [2:1.00]
+; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [2:1.00]
+; BTVER2-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
+; BTVER2-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [2:1.00]
+; BTVER2-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
+; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: f32_two_step:
; SANDY: # BB#0:
-; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1
-; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2
-; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2
-; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2
-; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1
-; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0
-; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0
-; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0
-; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0
-; SANDY-NEXT: retq
+; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
+; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
+; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [4:0.50]
+; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
+; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
+; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
+; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: f32_two_step:
; HASWELL: # BB#0:
-; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1
-; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; HASWELL-NEXT: vmovaps %xmm1, %xmm3
+; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
+; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
+; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3
; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3
; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0
; HASWELL-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0
-; HASWELL-NEXT: retq
+; HASWELL-NEXT: retq # sched: [1:1.00]
;
; HASWELL-NO-FMA-LABEL: f32_two_step:
; HASWELL-NO-FMA: # BB#0:
@@ -209,13 +245,13 @@ define float @f32_two_step(float %x) #2 {
; AVX512-LABEL: f32_two_step:
; AVX512: # BB#0:
; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1
-; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; AVX512-NEXT: vmovaps %xmm1, %xmm3
+; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
+; AVX512-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3
; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3
; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0
; AVX512-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0
-; AVX512-NEXT: retq
+; AVX512-NEXT: retq # sched: [1:1.00]
%div = fdiv fast float 1.0, %x
ret float %div
}
@@ -242,21 +278,21 @@ define <4 x float> @v4f32_no_estimate(<4 x float> %x) #0 {
;
; BTVER2-LABEL: v4f32_no_estimate:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
-; BTVER2-NEXT: vdivps %xmm0, %xmm1, %xmm0
-; BTVER2-NEXT: retq
+; BTVER2-NEXT: vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
+; BTVER2-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [19:19.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: v4f32_no_estimate:
; SANDY: # BB#0:
-; SANDY-NEXT: vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
-; SANDY-NEXT: vdivps %xmm0, %xmm1, %xmm0
-; SANDY-NEXT: retq
+; SANDY-NEXT: vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
+; SANDY-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: v4f32_no_estimate:
; HASWELL: # BB#0:
-; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
-; HASWELL-NEXT: vdivps %xmm0, %xmm1, %xmm0
-; HASWELL-NEXT: retq
+; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 # sched: [4:0.50]
+; HASWELL-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
;
; HASWELL-NO-FMA-LABEL: v4f32_no_estimate:
; HASWELL-NO-FMA: # BB#0:
@@ -266,9 +302,9 @@ define <4 x float> @v4f32_no_estimate(<4 x float> %x) #0 {
;
; AVX512-LABEL: v4f32_no_estimate:
; AVX512: # BB#0:
-; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
-; AVX512-NEXT: vdivps %xmm0, %xmm1, %xmm0
-; AVX512-NEXT: retq
+; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 # sched: [4:0.50]
+; AVX512-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [12:1.00]
+; AVX512-NEXT: retq # sched: [1:1.00]
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <4 x float> %div
}
@@ -304,31 +340,31 @@ define <4 x float> @v4f32_one_step(<4 x float> %x) #1 {
;
; BTVER2-LABEL: v4f32_one_step:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
-; BTVER2-NEXT: vrcpps %xmm0, %xmm1
-; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0
-; BTVER2-NEXT: vsubps %xmm0, %xmm2, %xmm0
-; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0
-; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0
-; BTVER2-NEXT: retq
+; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
+; BTVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [2:1.00]
+; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: v4f32_one_step:
; SANDY: # BB#0:
-; SANDY-NEXT: vrcpps %xmm0, %xmm1
-; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0
-; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
-; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0
-; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0
-; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0
-; SANDY-NEXT: retq
+; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
+; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
+; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: v4f32_one_step:
; HASWELL: # BB#0:
-; HASWELL-NEXT: vrcpps %xmm0, %xmm1
-; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
+; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
+; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
-; HASWELL-NEXT: retq
+; HASWELL-NEXT: retq # sched: [1:1.00]
;
; HASWELL-NO-FMA-LABEL: v4f32_one_step:
; HASWELL-NO-FMA: # BB#0:
@@ -342,18 +378,18 @@ define <4 x float> @v4f32_one_step(<4 x float> %x) #1 {
;
; KNL-LABEL: v4f32_one_step:
; KNL: # BB#0:
-; KNL-NEXT: vrcpps %xmm0, %xmm1
-; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
+; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
+; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
-; KNL-NEXT: retq
+; KNL-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: v4f32_one_step:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %xmm0, %xmm1
; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to4}, %xmm1, %xmm0
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
-; SKX-NEXT: retq
+; SKX-NEXT: retq # sched: [1:1.00]
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <4 x float> %div
}
@@ -403,42 +439,42 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
;
; BTVER2-LABEL: v4f32_two_step:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
-; BTVER2-NEXT: vrcpps %xmm0, %xmm1
-; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm2
-; BTVER2-NEXT: vsubps %xmm2, %xmm3, %xmm2
-; BTVER2-NEXT: vmulps %xmm2, %xmm1, %xmm2
-; BTVER2-NEXT: vaddps %xmm2, %xmm1, %xmm1
-; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0
-; BTVER2-NEXT: vsubps %xmm0, %xmm3, %xmm0
-; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0
-; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0
-; BTVER2-NEXT: retq
+; BTVER2-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
+; BTVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [2:1.00]
+; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [2:1.00]
+; BTVER2-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
+; BTVER2-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [2:1.00]
+; BTVER2-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
+; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: v4f32_two_step:
; SANDY: # BB#0:
-; SANDY-NEXT: vrcpps %xmm0, %xmm1
-; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2
-; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
-; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2
-; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2
-; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1
-; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0
-; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0
-; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0
-; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0
-; SANDY-NEXT: retq
+; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
+; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
+; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
+; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
+; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
+; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
+; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: v4f32_two_step:
; HASWELL: # BB#0:
-; HASWELL-NEXT: vrcpps %xmm0, %xmm1
-; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
-; HASWELL-NEXT: vmovaps %xmm1, %xmm3
+; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
+; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
+; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0
; HASWELL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0
-; HASWELL-NEXT: retq
+; HASWELL-NEXT: retq # sched: [1:1.00]
;
; HASWELL-NO-FMA-LABEL: v4f32_two_step:
; HASWELL-NO-FMA: # BB#0:
@@ -456,25 +492,25 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
;
; KNL-LABEL: v4f32_two_step:
; KNL: # BB#0:
-; KNL-NEXT: vrcpps %xmm0, %xmm1
-; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
-; KNL-NEXT: vmovaps %xmm1, %xmm3
+; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
+; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
+; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0
; KNL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0
-; KNL-NEXT: retq
+; KNL-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: v4f32_two_step:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %xmm0, %xmm1
-; SKX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
-; SKX-NEXT: vmovaps %xmm1, %xmm3
+; SKX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
+; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; SKX-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
; SKX-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0
; SKX-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0
-; SKX-NEXT: retq
+; SKX-NEXT: retq # sched: [1:1.00]
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <4 x float> %div
}
@@ -504,21 +540,21 @@ define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 {
;
; BTVER2-LABEL: v8f32_no_estimate:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
-; BTVER2-NEXT: vdivps %ymm0, %ymm1, %ymm0
-; BTVER2-NEXT: retq
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
+; BTVER2-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [19:19.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: v8f32_no_estimate:
; SANDY: # BB#0:
-; SANDY-NEXT: vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
-; SANDY-NEXT: vdivps %ymm0, %ymm1, %ymm0
-; SANDY-NEXT: retq
+; SANDY-NEXT: vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
+; SANDY-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [12:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: v8f32_no_estimate:
; HASWELL: # BB#0:
-; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
-; HASWELL-NEXT: vdivps %ymm0, %ymm1, %ymm0
-; HASWELL-NEXT: retq
+; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 # sched: [5:1.00]
+; HASWELL-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [19:2.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
;
; HASWELL-NO-FMA-LABEL: v8f32_no_estimate:
; HASWELL-NO-FMA: # BB#0:
@@ -528,9 +564,9 @@ define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 {
;
; AVX512-LABEL: v8f32_no_estimate:
; AVX512: # BB#0:
-; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
-; AVX512-NEXT: vdivps %ymm0, %ymm1, %ymm0
-; AVX512-NEXT: retq
+; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 # sched: [5:1.00]
+; AVX512-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [19:2.00]
+; AVX512-NEXT: retq # sched: [1:1.00]
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <8 x float> %div
}
@@ -573,31 +609,31 @@ define <8 x float> @v8f32_one_step(<8 x float> %x) #1 {
;
; BTVER2-LABEL: v8f32_one_step:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
-; BTVER2-NEXT: vrcpps %ymm0, %ymm1
-; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0
-; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0
-; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0
-; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0
-; BTVER2-NEXT: retq
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
+; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:1.00]
+; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
+; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
+; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [2:1.00]
+; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: v8f32_one_step:
; SANDY: # BB#0:
-; SANDY-NEXT: vrcpps %ymm0, %ymm1
-; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0
-; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
-; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0
-; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0
-; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0
-; SANDY-NEXT: retq
+; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00]
+; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
+; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
+; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
+; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
+; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: v8f32_one_step:
; HASWELL: # BB#0:
-; HASWELL-NEXT: vrcpps %ymm0, %ymm1
-; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2
+; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
+; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
-; HASWELL-NEXT: retq
+; HASWELL-NEXT: retq # sched: [1:1.00]
;
; HASWELL-NO-FMA-LABEL: v8f32_one_step:
; HASWELL-NO-FMA: # BB#0:
@@ -611,18 +647,18 @@ define <8 x float> @v8f32_one_step(<8 x float> %x) #1 {
;
; KNL-LABEL: v8f32_one_step:
; KNL: # BB#0:
-; KNL-NEXT: vrcpps %ymm0, %ymm1
-; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2
+; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
+; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
-; KNL-NEXT: retq
+; KNL-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: v8f32_one_step:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %ymm0, %ymm1
; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to8}, %ymm1, %ymm0
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
-; SKX-NEXT: retq
+; SKX-NEXT: retq # sched: [1:1.00]
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <8 x float> %div
}
@@ -685,42 +721,42 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
;
; BTVER2-LABEL: v8f32_two_step:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
-; BTVER2-NEXT: vrcpps %ymm0, %ymm1
-; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm2
-; BTVER2-NEXT: vsubps %ymm2, %ymm3, %ymm2
-; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm2
-; BTVER2-NEXT: vaddps %ymm2, %ymm1, %ymm1
-; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0
-; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0
-; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0
-; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0
-; BTVER2-NEXT: retq
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
+; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:1.00]
+; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [2:1.00]
+; BTVER2-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
+; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [2:1.00]
+; BTVER2-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
+; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
+; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
+; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [2:1.00]
+; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: v8f32_two_step:
; SANDY: # BB#0:
-; SANDY-NEXT: vrcpps %ymm0, %ymm1
-; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2
-; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
-; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2
-; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2
-; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1
-; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0
-; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0
-; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0
-; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0
-; SANDY-NEXT: retq
+; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00]
+; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00]
+; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
+; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
+; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00]
+; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
+; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
+; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
+; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
+; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: v8f32_two_step:
; HASWELL: # BB#0:
-; HASWELL-NEXT: vrcpps %ymm0, %ymm1
-; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2
-; HASWELL-NEXT: vmovaps %ymm1, %ymm3
+; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
+; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
+; HASWELL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0
; HASWELL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0
-; HASWELL-NEXT: retq
+; HASWELL-NEXT: retq # sched: [1:1.00]
;
; HASWELL-NO-FMA-LABEL: v8f32_two_step:
; HASWELL-NO-FMA: # BB#0:
@@ -738,25 +774,25 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
;
; KNL-LABEL: v8f32_two_step:
; KNL: # BB#0:
-; KNL-NEXT: vrcpps %ymm0, %ymm1
-; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2
-; KNL-NEXT: vmovaps %ymm1, %ymm3
+; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
+; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
+; KNL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0
; KNL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0
-; KNL-NEXT: retq
+; KNL-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: v8f32_two_step:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %ymm0, %ymm1
-; SKX-NEXT: vbroadcastss {{.*}}(%rip), %ymm2
-; SKX-NEXT: vmovaps %ymm1, %ymm3
+; SKX-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
+; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
; SKX-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
; SKX-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0
; SKX-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0
-; SKX-NEXT: retq
+; SKX-NEXT: retq # sched: [1:1.00]
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <8 x float> %div
}
diff --git a/llvm/test/CodeGen/X86/recip-fastmath2.ll b/llvm/test/CodeGen/X86/recip-fastmath2.ll
index 730d2f13038..440a6f0bef1 100644
--- a/llvm/test/CodeGen/X86/recip-fastmath2.ll
+++ b/llvm/test/CodeGen/X86/recip-fastmath2.ll
@@ -1,13 +1,13 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE-RECIP
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX-RECIP
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=FMA-RECIP
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge| FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -mattr=-fma | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL-NO-FMA
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 --check-prefix=KNL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 --check-prefix=SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE-RECIP
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX-RECIP
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=FMA-RECIP
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BTVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=SANDY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -print-schedule -mattr=-fma | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL-NO-FMA
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 --check-prefix=KNL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 --check-prefix=SKX
; It's the extra tests coverage for recip as discussed on D26855.
@@ -32,33 +32,33 @@ define float @f32_no_step_2(float %x) #3 {
;
; BTVER2-LABEL: f32_no_step_2:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm0
-; BTVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
-; BTVER2-NEXT: retq
+; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: f32_no_step_2:
; SANDY: # BB#0:
-; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm0
-; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
-; SANDY-NEXT: retq
+; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: f32_no_step_2:
; HASWELL: # BB#0:
-; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0
-; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
-; HASWELL-NEXT: retq
+; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
;
; HASWELL-NO-FMA-LABEL: f32_no_step_2:
; HASWELL-NO-FMA: # BB#0:
-; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm0
-; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
-; HASWELL-NO-FMA-NEXT: retq
+; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
+; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00]
;
; AVX512-LABEL: f32_no_step_2:
; AVX512: # BB#0:
; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm0
-; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
+; AVX512-NEXT: retq # sched: [1:1.00]
%div = fdiv fast float 1234.0, %x
ret float %div
}
@@ -97,52 +97,52 @@ define float @f32_one_step_2(float %x) #1 {
;
; BTVER2-LABEL: f32_one_step_2:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1
-; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0
-; BTVER2-NEXT: vsubss %xmm0, %xmm2, %xmm0
-; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0
-; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0
-; BTVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
-; BTVER2-NEXT: retq
+; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
+; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [2:1.00]
+; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: f32_one_step_2:
; SANDY: # BB#0:
-; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1
-; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0
-; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0
-; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0
-; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0
-; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
-; SANDY-NEXT: retq
+; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
+; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
+; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: f32_one_step_2:
; HASWELL: # BB#0:
-; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0
; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0
-; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
-; HASWELL-NEXT: retq
+; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
;
; HASWELL-NO-FMA-LABEL: f32_one_step_2:
; HASWELL-NO-FMA: # BB#0:
-; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1
-; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0
-; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm2, %xmm0
-; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0
-; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0
-; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
-; HASWELL-NO-FMA-NEXT: retq
+; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
+; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
+; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
+; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
+; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
+; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00]
;
; AVX512-LABEL: f32_one_step_2:
; AVX512: # BB#0:
; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1
; AVX512-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0
; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0
-; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
+; AVX512-NEXT: retq # sched: [1:1.00]
%div = fdiv fast float 3456.0, %x
ret float %div
}
@@ -184,57 +184,57 @@ define float @f32_one_step_2_divs(float %x) #1 {
;
; BTVER2-LABEL: f32_one_step_2_divs:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1
-; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0
-; BTVER2-NEXT: vsubss %xmm0, %xmm2, %xmm0
-; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0
-; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0
-; BTVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1
-; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0
-; BTVER2-NEXT: retq
+; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
+; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [2:1.00]
+; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [7:1.00]
+; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: f32_one_step_2_divs:
; SANDY: # BB#0:
-; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1
-; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0
-; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0
-; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0
-; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0
-; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1
-; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0
-; SANDY-NEXT: retq
+; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
+; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
+; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:1.00]
+; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: f32_one_step_2_divs:
; HASWELL: # BB#0:
-; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; HASWELL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0
; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0
-; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1
-; HASWELL-NEXT: vmulss %xmm0, %xmm1, %xmm0
-; HASWELL-NEXT: retq
+; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
+; HASWELL-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
;
; HASWELL-NO-FMA-LABEL: f32_one_step_2_divs:
; HASWELL-NO-FMA: # BB#0:
-; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1
-; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0
-; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm2, %xmm0
-; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0
-; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0
-; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1
-; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0
-; HASWELL-NO-FMA-NEXT: retq
+; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
+; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
+; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
+; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
+; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
+; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
+; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00]
;
; AVX512-LABEL: f32_one_step_2_divs:
; AVX512: # BB#0:
; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1
; AVX512-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0
; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0
-; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1
-; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0
-; AVX512-NEXT: retq
+; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
+; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
+; AVX512-NEXT: retq # sched: [1:1.00]
%div = fdiv fast float 3456.0, %x
%div2 = fdiv fast float %div, %x
ret float %div2
@@ -288,72 +288,72 @@ define float @f32_two_step_2(float %x) #2 {
;
; BTVER2-LABEL: f32_two_step_2:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1
-; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm2
-; BTVER2-NEXT: vsubss %xmm2, %xmm3, %xmm2
-; BTVER2-NEXT: vmulss %xmm2, %xmm1, %xmm2
-; BTVER2-NEXT: vaddss %xmm2, %xmm1, %xmm1
-; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0
-; BTVER2-NEXT: vsubss %xmm0, %xmm3, %xmm0
-; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0
-; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0
-; BTVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
-; BTVER2-NEXT: retq
+; BTVER2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [5:1.00]
+; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [2:1.00]
+; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [2:1.00]
+; BTVER2-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
+; BTVER2-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [2:1.00]
+; BTVER2-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
+; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: f32_two_step_2:
; SANDY: # BB#0:
-; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1
-; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2
-; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2
-; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2
-; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1
-; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0
-; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0
-; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0
-; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0
-; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
-; SANDY-NEXT: retq
+; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
+; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
+; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [4:0.50]
+; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
+; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
+; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
+; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: f32_two_step_2:
; HASWELL: # BB#0:
-; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1
-; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; HASWELL-NEXT: vmovaps %xmm1, %xmm3
+; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
+; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
+; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3
; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3
; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0
; HASWELL-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0
-; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
-; HASWELL-NEXT: retq
+; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
;
; HASWELL-NO-FMA-LABEL: f32_two_step_2:
; HASWELL-NO-FMA: # BB#0:
-; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1
-; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm2
-; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; HASWELL-NO-FMA-NEXT: vsubss %xmm2, %xmm3, %xmm2
-; HASWELL-NO-FMA-NEXT: vmulss %xmm2, %xmm1, %xmm2
-; HASWELL-NO-FMA-NEXT: vaddss %xmm2, %xmm1, %xmm1
-; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0
-; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm3, %xmm0
-; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0
-; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0
-; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
-; HASWELL-NO-FMA-NEXT: retq
+; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
+; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:0.50]
+; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [4:0.50]
+; HASWELL-NO-FMA-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
+; HASWELL-NO-FMA-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:0.50]
+; HASWELL-NO-FMA-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
+; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
+; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
+; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
+; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00]
;
; AVX512-LABEL: f32_two_step_2:
; AVX512: # BB#0:
; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1
-; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; AVX512-NEXT: vmovaps %xmm1, %xmm3
+; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50]
+; AVX512-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3
; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3
; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0
; AVX512-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0
-; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
+; AVX512-NEXT: retq # sched: [1:1.00]
%div = fdiv fast float 6789.0, %x
ret float %div
}
@@ -392,62 +392,62 @@ define <4 x float> @v4f32_one_step2(<4 x float> %x) #1 {
;
; BTVER2-LABEL: v4f32_one_step2:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
-; BTVER2-NEXT: vrcpps %xmm0, %xmm1
-; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0
-; BTVER2-NEXT: vsubps %xmm0, %xmm2, %xmm0
-; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0
-; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
-; BTVER2-NEXT: retq
+; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
+; BTVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [2:1.00]
+; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: v4f32_one_step2:
; SANDY: # BB#0:
-; SANDY-NEXT: vrcpps %xmm0, %xmm1
-; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0
-; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
-; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0
-; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0
-; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0
-; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
-; SANDY-NEXT: retq
+; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
+; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
+; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: v4f32_one_step2:
; HASWELL: # BB#0:
-; HASWELL-NEXT: vrcpps %xmm0, %xmm1
-; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
+; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
+; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
-; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
-; HASWELL-NEXT: retq
+; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
;
; HASWELL-NO-FMA-LABEL: v4f32_one_step2:
; HASWELL-NO-FMA: # BB#0:
-; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1
-; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
-; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0
-; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0
-; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0
-; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
-; HASWELL-NO-FMA-NEXT: retq
+; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
+; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
+; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
+; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
+; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
+; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00]
;
; KNL-LABEL: v4f32_one_step2:
; KNL: # BB#0:
-; KNL-NEXT: vrcpps %xmm0, %xmm1
-; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
+; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
+; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
-; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
-; KNL-NEXT: retq
+; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
+; KNL-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: v4f32_one_step2:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %xmm0, %xmm1
; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to4}, %xmm1, %xmm0
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
-; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
-; SKX-NEXT: retq
+; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
+; SKX-NEXT: retq # sched: [1:1.00]
%div = fdiv fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x
ret <4 x float> %div
}
@@ -489,68 +489,68 @@ define <4 x float> @v4f32_one_step_2_divs(<4 x float> %x) #1 {
;
; BTVER2-LABEL: v4f32_one_step_2_divs:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
-; BTVER2-NEXT: vrcpps %xmm0, %xmm1
-; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0
-; BTVER2-NEXT: vsubps %xmm0, %xmm2, %xmm0
-; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0
-; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1
-; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0
-; BTVER2-NEXT: retq
+; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
+; BTVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [2:1.00]
+; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [7:1.00]
+; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: v4f32_one_step_2_divs:
; SANDY: # BB#0:
-; SANDY-NEXT: vrcpps %xmm0, %xmm1
-; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0
-; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
-; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0
-; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0
-; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0
-; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1
-; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0
-; SANDY-NEXT: retq
+; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
+; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
+; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:1.00]
+; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: v4f32_one_step_2_divs:
; HASWELL: # BB#0:
-; HASWELL-NEXT: vrcpps %xmm0, %xmm1
-; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
+; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
+; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
-; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1
-; HASWELL-NEXT: vmulps %xmm0, %xmm1, %xmm0
-; HASWELL-NEXT: retq
+; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
+; HASWELL-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
;
; HASWELL-NO-FMA-LABEL: v4f32_one_step_2_divs:
; HASWELL-NO-FMA: # BB#0:
-; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1
-; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
-; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0
-; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0
-; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0
-; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1
-; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0
-; HASWELL-NO-FMA-NEXT: retq
+; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
+; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
+; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
+; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
+; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
+; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
+; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00]
;
; KNL-LABEL: v4f32_one_step_2_divs:
; KNL: # BB#0:
-; KNL-NEXT: vrcpps %xmm0, %xmm1
-; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
+; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
+; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
-; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1
-; KNL-NEXT: vmulps %xmm0, %xmm1, %xmm0
-; KNL-NEXT: retq
+; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
+; KNL-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
+; KNL-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: v4f32_one_step_2_divs:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %xmm0, %xmm1
; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to4}, %xmm1, %xmm0
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0
-; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1
-; SKX-NEXT: vmulps %xmm0, %xmm1, %xmm0
-; SKX-NEXT: retq
+; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
+; SKX-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
+; SKX-NEXT: retq # sched: [1:1.00]
%div = fdiv fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x
%div2 = fdiv fast <4 x float> %div, %x
ret <4 x float> %div2
@@ -604,84 +604,84 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
;
; BTVER2-LABEL: v4f32_two_step2:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
-; BTVER2-NEXT: vrcpps %xmm0, %xmm1
-; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm2
-; BTVER2-NEXT: vsubps %xmm2, %xmm3, %xmm2
-; BTVER2-NEXT: vmulps %xmm2, %xmm1, %xmm2
-; BTVER2-NEXT: vaddps %xmm2, %xmm1, %xmm1
-; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0
-; BTVER2-NEXT: vsubps %xmm0, %xmm3, %xmm0
-; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0
-; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
-; BTVER2-NEXT: retq
+; BTVER2-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
+; BTVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [2:1.00]
+; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [2:1.00]
+; BTVER2-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
+; BTVER2-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [2:1.00]
+; BTVER2-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
+; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: v4f32_two_step2:
; SANDY: # BB#0:
-; SANDY-NEXT: vrcpps %xmm0, %xmm1
-; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2
-; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
-; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2
-; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2
-; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1
-; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0
-; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0
-; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0
-; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0
-; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
-; SANDY-NEXT: retq
+; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
+; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
+; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
+; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
+; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
+; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
+; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: v4f32_two_step2:
; HASWELL: # BB#0:
-; HASWELL-NEXT: vrcpps %xmm0, %xmm1
-; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
-; HASWELL-NEXT: vmovaps %xmm1, %xmm3
+; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
+; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
+; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0
; HASWELL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0
-; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
-; HASWELL-NEXT: retq
+; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
+; HASWELL-NEXT: retq # sched: [1:1.00]
;
; HASWELL-NO-FMA-LABEL: v4f32_two_step2:
; HASWELL-NO-FMA: # BB#0:
-; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1
-; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm2
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm3
-; HASWELL-NO-FMA-NEXT: vsubps %xmm2, %xmm3, %xmm2
-; HASWELL-NO-FMA-NEXT: vmulps %xmm2, %xmm1, %xmm2
-; HASWELL-NO-FMA-NEXT: vaddps %xmm2, %xmm1, %xmm1
-; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0
-; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm3, %xmm0
-; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0
-; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0
-; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
-; HASWELL-NO-FMA-NEXT: retq
+; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
+; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:0.50]
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm3 # sched: [4:0.50]
+; HASWELL-NO-FMA-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
+; HASWELL-NO-FMA-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:0.50]
+; HASWELL-NO-FMA-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
+; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
+; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
+; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
+; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
+; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00]
;
; KNL-LABEL: v4f32_two_step2:
; KNL: # BB#0:
-; KNL-NEXT: vrcpps %xmm0, %xmm1
-; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
-; KNL-NEXT: vmovaps %xmm1, %xmm3
+; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
+; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
+; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
; KNL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0
; KNL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0
-; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
-; KNL-NEXT: retq
+; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
+; KNL-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: v4f32_two_step2:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %xmm0, %xmm1
-; SKX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
-; SKX-NEXT: vmovaps %xmm1, %xmm3
+; SKX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50]
+; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
; SKX-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3
; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3
; SKX-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0
; SKX-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0
-; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
-; SKX-NEXT: retq
+; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
+; SKX-NEXT: retq # sched: [1:1.00]
%div = fdiv fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x
ret <4 x float> %div
}
@@ -728,62 +728,62 @@ define <8 x float> @v8f32_one_step2(<8 x float> %x) #1 {
;
; BTVER2-LABEL: v8f32_one_step2:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
-; BTVER2-NEXT: vrcpps %ymm0, %ymm1
-; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0
-; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0
-; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0
-; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
-; BTVER2-NEXT: retq
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
+; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:1.00]
+; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
+; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
+; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [2:1.00]
+; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [7:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: v8f32_one_step2:
; SANDY: # BB#0:
-; SANDY-NEXT: vrcpps %ymm0, %ymm1
-; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0
-; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
-; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0
-; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0
-; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0
-; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
-; SANDY-NEXT: retq
+; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00]
+; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
+; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
+; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
+; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
+; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: v8f32_one_step2:
; HASWELL: # BB#0:
-; HASWELL-NEXT: vrcpps %ymm0, %ymm1
-; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2
+; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
+; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
-; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
-; HASWELL-NEXT: retq
+; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
;
; HASWELL-NO-FMA-LABEL: v8f32_one_step2:
; HASWELL-NO-FMA: # BB#0:
-; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1
-; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm2
-; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0
-; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0
-; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0
-; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
-; HASWELL-NO-FMA-NEXT: retq
+; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
+; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
+; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
+; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
+; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
+; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00]
;
; KNL-LABEL: v8f32_one_step2:
; KNL: # BB#0:
-; KNL-NEXT: vrcpps %ymm0, %ymm1
-; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2
+; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
+; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
-; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
-; KNL-NEXT: retq
+; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
+; KNL-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: v8f32_one_step2:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %ymm0, %ymm1
; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to8}, %ymm1, %ymm0
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
-; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
-; SKX-NEXT: retq
+; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
+; SKX-NEXT: retq # sched: [1:1.00]
%div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x
ret <8 x float> %div
}
@@ -834,68 +834,68 @@ define <8 x float> @v8f32_one_step_2_divs(<8 x float> %x) #1 {
;
; BTVER2-LABEL: v8f32_one_step_2_divs:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
-; BTVER2-NEXT: vrcpps %ymm0, %ymm1
-; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0
-; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0
-; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0
-; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1
-; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0
-; BTVER2-NEXT: retq
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
+; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:1.00]
+; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
+; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
+; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [2:1.00]
+; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [7:1.00]
+; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [2:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: v8f32_one_step_2_divs:
; SANDY: # BB#0:
-; SANDY-NEXT: vrcpps %ymm0, %ymm1
-; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0
-; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
-; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0
-; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0
-; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0
-; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1
-; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0
-; SANDY-NEXT: retq
+; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00]
+; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
+; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
+; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
+; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
+; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00]
+; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: v8f32_one_step_2_divs:
; HASWELL: # BB#0:
-; HASWELL-NEXT: vrcpps %ymm0, %ymm1
-; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2
+; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
+; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
-; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1
-; HASWELL-NEXT: vmulps %ymm0, %ymm1, %ymm0
-; HASWELL-NEXT: retq
+; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00]
+; HASWELL-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
;
; HASWELL-NO-FMA-LABEL: v8f32_one_step_2_divs:
; HASWELL-NO-FMA: # BB#0:
-; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1
-; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm2
-; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0
-; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0
-; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0
-; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1
-; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0
-; HASWELL-NO-FMA-NEXT: retq
+; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
+; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
+; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
+; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
+; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00]
+; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
+; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00]
;
; KNL-LABEL: v8f32_one_step_2_divs:
; KNL: # BB#0:
-; KNL-NEXT: vrcpps %ymm0, %ymm1
-; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2
+; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
+; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
-; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1
-; KNL-NEXT: vmulps %ymm0, %ymm1, %ymm0
-; KNL-NEXT: retq
+; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00]
+; KNL-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
+; KNL-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: v8f32_one_step_2_divs:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %ymm0, %ymm1
; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to8}, %ymm1, %ymm0
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0
-; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1
-; SKX-NEXT: vmulps %ymm0, %ymm1, %ymm0
-; SKX-NEXT: retq
+; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00]
+; SKX-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
+; SKX-NEXT: retq # sched: [1:1.00]
%div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x
%div2 = fdiv fast <8 x float> %div, %x
ret <8 x float> %div2
@@ -963,84 +963,84 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 {
;
; BTVER2-LABEL: v8f32_two_step2:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
-; BTVER2-NEXT: vrcpps %ymm0, %ymm1
-; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm2
-; BTVER2-NEXT: vsubps %ymm2, %ymm3, %ymm2
-; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm2
-; BTVER2-NEXT: vaddps %ymm2, %ymm1, %ymm1
-; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0
-; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0
-; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0
-; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
-; BTVER2-NEXT: retq
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [5:1.00]
+; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:1.00]
+; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [2:1.00]
+; BTVER2-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
+; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [2:1.00]
+; BTVER2-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
+; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
+; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
+; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [2:1.00]
+; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [7:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: v8f32_two_step2:
; SANDY: # BB#0:
-; SANDY-NEXT: vrcpps %ymm0, %ymm1
-; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2
-; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
-; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2
-; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2
-; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1
-; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0
-; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0
-; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0
-; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0
-; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
-; SANDY-NEXT: retq
+; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00]
+; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00]
+; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50]
+; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
+; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00]
+; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
+; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
+; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
+; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
+; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: v8f32_two_step2:
; HASWELL: # BB#0:
-; HASWELL-NEXT: vrcpps %ymm0, %ymm1
-; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2
-; HASWELL-NEXT: vmovaps %ymm1, %ymm3
+; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
+; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
+; HASWELL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0
; HASWELL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0
-; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
-; HASWELL-NEXT: retq
+; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
;
; HASWELL-NO-FMA-LABEL: v8f32_two_step2:
; HASWELL-NO-FMA: # BB#0:
-; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1
-; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm2
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm3
-; HASWELL-NO-FMA-NEXT: vsubps %ymm2, %ymm3, %ymm2
-; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm2
-; HASWELL-NO-FMA-NEXT: vaddps %ymm2, %ymm1, %ymm1
-; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0
-; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm3, %ymm0
-; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0
-; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0
-; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
-; HASWELL-NO-FMA-NEXT: retq
+; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
+; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00]
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm3 # sched: [5:1.00]
+; HASWELL-NO-FMA-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
+; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00]
+; HASWELL-NO-FMA-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
+; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
+; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
+; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
+; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
+; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
+; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00]
;
; KNL-LABEL: v8f32_two_step2:
; KNL: # BB#0:
-; KNL-NEXT: vrcpps %ymm0, %ymm1
-; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2
-; KNL-NEXT: vmovaps %ymm1, %ymm3
+; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
+; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
+; KNL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
; KNL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0
; KNL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0
-; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
-; KNL-NEXT: retq
+; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
+; KNL-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: v8f32_two_step2:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %ymm0, %ymm1
-; SKX-NEXT: vbroadcastss {{.*}}(%rip), %ymm2
-; SKX-NEXT: vmovaps %ymm1, %ymm3
+; SKX-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00]
+; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
; SKX-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3
; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3
; SKX-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0
; SKX-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0
-; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
-; SKX-NEXT: retq
+; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
+; SKX-NEXT: retq # sched: [1:1.00]
%div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x
ret <8 x float> %div
}
@@ -1064,33 +1064,33 @@ define <8 x float> @v8f32_no_step(<8 x float> %x) #3 {
;
; BTVER2-LABEL: v8f32_no_step:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vrcpps %ymm0, %ymm0
-; BTVER2-NEXT: retq
+; BTVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [2:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: v8f32_no_step:
; SANDY: # BB#0:
-; SANDY-NEXT: vrcpps %ymm0, %ymm0
-; SANDY-NEXT: retq
+; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: v8f32_no_step:
; HASWELL: # BB#0:
-; HASWELL-NEXT: vrcpps %ymm0, %ymm0
-; HASWELL-NEXT: retq
+; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
;
; HASWELL-NO-FMA-LABEL: v8f32_no_step:
; HASWELL-NO-FMA: # BB#0:
-; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0
-; HASWELL-NO-FMA-NEXT: retq
+; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
+; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00]
;
; KNL-LABEL: v8f32_no_step:
; KNL: # BB#0:
-; KNL-NEXT: vrcpps %ymm0, %ymm0
-; KNL-NEXT: retq
+; KNL-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
+; KNL-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: v8f32_no_step:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %ymm0, %ymm0
-; SKX-NEXT: retq
+; SKX-NEXT: retq # sched: [1:1.00]
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <8 x float> %div
}
@@ -1118,39 +1118,39 @@ define <8 x float> @v8f32_no_step2(<8 x float> %x) #3 {
;
; BTVER2-LABEL: v8f32_no_step2:
; BTVER2: # BB#0:
-; BTVER2-NEXT: vrcpps %ymm0, %ymm0
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
-; BTVER2-NEXT: retq
+; BTVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [2:1.00]
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [7:1.00]
+; BTVER2-NEXT: retq # sched: [4:1.00]
;
; SANDY-LABEL: v8f32_no_step2:
; SANDY: # BB#0:
-; SANDY-NEXT: vrcpps %ymm0, %ymm0
-; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
-; SANDY-NEXT: retq
+; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:1.00]
+; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
+; SANDY-NEXT: retq # sched: [5:1.00]
;
; HASWELL-LABEL: v8f32_no_step2:
; HASWELL: # BB#0:
-; HASWELL-NEXT: vrcpps %ymm0, %ymm0
-; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
-; HASWELL-NEXT: retq
+; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
+; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
+; HASWELL-NEXT: retq # sched: [1:1.00]
;
; HASWELL-NO-FMA-LABEL: v8f32_no_step2:
; HASWELL-NO-FMA: # BB#0:
-; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0
-; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
-; HASWELL-NO-FMA-NEXT: retq
+; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
+; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
+; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00]
;
; KNL-LABEL: v8f32_no_step2:
; KNL: # BB#0:
-; KNL-NEXT: vrcpps %ymm0, %ymm0
-; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
-; KNL-NEXT: retq
+; KNL-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
+; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
+; KNL-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: v8f32_no_step2:
; SKX: # BB#0:
; SKX-NEXT: vrcp14ps %ymm0, %ymm0
-; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
-; SKX-NEXT: retq
+; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00]
+; SKX-NEXT: retq # sched: [1:1.00]
%div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x
ret <8 x float> %div
}
OpenPOWER on IntegriCloud