summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/CodeGen/AsmPrinter.h3
-rw-r--r--llvm/include/llvm/CodeGen/TargetSubtargetInfo.h7
-rw-r--r--llvm/include/llvm/MC/MCObjectStreamer.h3
-rw-r--r--llvm/include/llvm/MC/MCParser/MCAsmParser.h6
-rw-r--r--llvm/include/llvm/MC/MCStreamer.h4
-rw-r--r--llvm/include/llvm/MC/MCSubtargetInfo.h5
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp57
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp2
-rw-r--r--llvm/lib/CodeGen/MachineCombiner.cpp11
-rw-r--r--llvm/lib/CodeGen/TargetSubtargetInfo.cpp70
-rw-r--r--llvm/lib/MC/MCAsmStreamer.cpp30
-rw-r--r--llvm/lib/MC/MCObjectStreamer.cpp2
-rw-r--r--llvm/lib/MC/MCStreamer.cpp3
-rw-r--r--llvm/lib/Object/RecordStreamer.cpp2
-rw-r--r--llvm/lib/Object/RecordStreamer.h3
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp4
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h3
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp2
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h3
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp4
-rw-r--r--llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp13
-rw-r--r--llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.h6
-rw-r--r--llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp3
-rw-r--r--llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp1
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h4
-rw-r--r--llvm/lib/Target/X86/X86MCInstLower.cpp26
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.h3
-rw-r--r--llvm/test/CodeGen/X86/3dnow-schedule.ll394
-rw-r--r--llvm/test/CodeGen/X86/adx-schedule.ll114
-rw-r--r--llvm/test/CodeGen/X86/aes-schedule.ll751
-rw-r--r--llvm/test/CodeGen/X86/avx-schedule.ll6120
-rw-r--r--llvm/test/CodeGen/X86/avx2-schedule.ll7111
-rwxr-xr-xllvm/test/CodeGen/X86/avx512-schedule.ll8762
-rwxr-xr-xllvm/test/CodeGen/X86/avx512-shuffle-schedule.ll15629
-rw-r--r--llvm/test/CodeGen/X86/avx512vpopcntdq-schedule.ll80
-rw-r--r--llvm/test/CodeGen/X86/bmi-schedule.ll763
-rw-r--r--llvm/test/CodeGen/X86/bmi2-schedule.ll811
-rw-r--r--llvm/test/CodeGen/X86/clflushopt-schedule.ll36
-rw-r--r--llvm/test/CodeGen/X86/clwb-schedule.ll18
-rw-r--r--llvm/test/CodeGen/X86/clzero-schedule.ll20
-rw-r--r--llvm/test/CodeGen/X86/cmov-schedule.ll2203
-rw-r--r--llvm/test/CodeGen/X86/f16c-schedule.ll255
-rw-r--r--llvm/test/CodeGen/X86/fma-schedule.ll3317
-rw-r--r--llvm/test/CodeGen/X86/fma4-schedule.ll1058
-rw-r--r--llvm/test/CodeGen/X86/fsgsbase-schedule.ll411
-rw-r--r--llvm/test/CodeGen/X86/lea32-schedule.ll898
-rw-r--r--llvm/test/CodeGen/X86/lea64-schedule.ll728
-rw-r--r--llvm/test/CodeGen/X86/lwp-schedule.ll299
-rw-r--r--llvm/test/CodeGen/X86/lzcnt-schedule.ll187
-rw-r--r--llvm/test/CodeGen/X86/mmx-schedule.ll7559
-rw-r--r--llvm/test/CodeGen/X86/movbe-schedule.ll190
-rw-r--r--llvm/test/CodeGen/X86/mul-constant-i32.ll1794
-rw-r--r--llvm/test/CodeGen/X86/mul-constant-i64.ll1863
-rw-r--r--llvm/test/CodeGen/X86/mwaitx-schedule.ll65
-rw-r--r--llvm/test/CodeGen/X86/popcnt-schedule.ll235
-rw-r--r--llvm/test/CodeGen/X86/rdpid-schedule.ll21
-rw-r--r--llvm/test/CodeGen/X86/rdrand-schedule.ll148
-rw-r--r--llvm/test/CodeGen/X86/rdseed-schedule.ll116
-rw-r--r--llvm/test/CodeGen/X86/recip-fastmath.ll966
-rw-r--r--llvm/test/CodeGen/X86/recip-fastmath2.ll1863
-rw-r--r--llvm/test/CodeGen/X86/rtm-schedule.ll62
-rw-r--r--llvm/test/CodeGen/X86/schedule-x86-64-shld.ll471
-rw-r--r--llvm/test/CodeGen/X86/schedule-x86_32.ll2601
-rw-r--r--llvm/test/CodeGen/X86/schedule-x86_64.ll18893
-rw-r--r--llvm/test/CodeGen/X86/sha-schedule.ll242
-rw-r--r--llvm/test/CodeGen/X86/sse-schedule.ll6975
-rw-r--r--llvm/test/CodeGen/X86/sse2-schedule.ll16972
-rw-r--r--llvm/test/CodeGen/X86/sse3-schedule.ll1549
-rw-r--r--llvm/test/CodeGen/X86/sse41-schedule.ll6248
-rw-r--r--llvm/test/CodeGen/X86/sse42-schedule.ll1631
-rw-r--r--llvm/test/CodeGen/X86/sse4a-schedule.ll156
-rw-r--r--llvm/test/CodeGen/X86/ssse3-schedule.ll2049
-rw-r--r--llvm/test/CodeGen/X86/tbm-schedule.ll773
-rw-r--r--llvm/test/CodeGen/X86/x87-schedule.ll6420
-rw-r--r--llvm/test/CodeGen/X86/xop-schedule.ll1818
-rw-r--r--llvm/tools/llvm-exegesis/llvm-exegesis.cpp3
-rw-r--r--llvm/tools/llvm-mca/CodeRegionGenerator.cpp3
79 files changed, 2485 insertions, 129452 deletions
diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h
index 4297450546a..fb12bb26b3e 100644
--- a/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -121,9 +121,6 @@ public:
using GOTEquivUsePair = std::pair<const GlobalVariable *, unsigned>;
MapVector<const MCSymbol *, GOTEquivUsePair> GlobalGOTEquivs;
- /// Enable print [latency:throughput] in output.
- bool EnablePrintSchedInfo = false;
-
private:
MCSymbol *CurrentFnBegin = nullptr;
MCSymbol *CurrentFnEnd = nullptr;
diff --git a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
index b206cf4e895..bf0e9b20833 100644
--- a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
@@ -189,9 +189,6 @@ public:
/// TargetLowering preference). It does not yet disable the postRA scheduler.
virtual bool enableMachineScheduler() const;
- /// Support printing of [latency:throughput] comment in output .S file.
- virtual bool supportPrintSchedInfo() const { return false; }
-
/// True if the machine scheduler should disable the TLI preference
/// for preRA scheduling with the source level scheduler.
virtual bool enableMachineSchedDefaultSched() const { return true; }
@@ -285,10 +282,6 @@ public:
/// possible.
virtual bool enableSubRegLiveness() const { return false; }
- /// Returns string representation of scheduler comment
- std::string getSchedInfoStr(const MachineInstr &MI) const;
- std::string getSchedInfoStr(MCInst const &MCI) const override;
-
/// This is called after a .mir file was loaded.
virtual void mirFileLoaded(MachineFunction &MF) const;
};
diff --git a/llvm/include/llvm/MC/MCObjectStreamer.h b/llvm/include/llvm/MC/MCObjectStreamer.h
index 07d0ce68336..8affca49490 100644
--- a/llvm/include/llvm/MC/MCObjectStreamer.h
+++ b/llvm/include/llvm/MC/MCObjectStreamer.h
@@ -115,8 +115,7 @@ public:
void EmitSLEB128Value(const MCExpr *Value) override;
void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) override;
void ChangeSection(MCSection *Section, const MCExpr *Subsection) override;
- void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- bool = false) override;
+ void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
/// Emit an instruction to a special fragment, because this instruction
/// can change its size during relaxation.
diff --git a/llvm/include/llvm/MC/MCParser/MCAsmParser.h b/llvm/include/llvm/MC/MCParser/MCAsmParser.h
index d65347ab980..da5653ee71d 100644
--- a/llvm/include/llvm/MC/MCParser/MCAsmParser.h
+++ b/llvm/include/llvm/MC/MCParser/MCAsmParser.h
@@ -129,9 +129,6 @@ protected: // Can only create subclasses.
/// Flag tracking whether any errors have been encountered.
bool HadError = false;
- /// Enable print [latency:throughput] in output file.
- bool EnablePrintSchedInfo = false;
-
bool ShowParsedOperands = false;
public:
@@ -165,9 +162,6 @@ public:
bool getShowParsedOperands() const { return ShowParsedOperands; }
void setShowParsedOperands(bool Value) { ShowParsedOperands = Value; }
- void setEnablePrintSchedInfo(bool Value) { EnablePrintSchedInfo = Value; }
- bool shouldPrintSchedInfo() const { return EnablePrintSchedInfo; }
-
/// Run the parser on the input source buffer.
virtual bool Run(bool NoInitialTextSection, bool NoFinalize = false) = 0;
diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h
index 57514bebe12..dc2386fca85 100644
--- a/llvm/include/llvm/MC/MCStreamer.h
+++ b/llvm/include/llvm/MC/MCStreamer.h
@@ -952,9 +952,7 @@ public:
virtual void EmitAddrsigSym(const MCSymbol *Sym) {}
/// Emit the given \p Instruction into the current section.
- /// PrintSchedInfo == true then schedul comment should be added to output
- virtual void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- bool PrintSchedInfo = false);
+ virtual void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI);
/// Set the bundle alignment mode from now on in the section.
/// The argument is the power of 2 to which the alignment is set. The
diff --git a/llvm/include/llvm/MC/MCSubtargetInfo.h b/llvm/include/llvm/MC/MCSubtargetInfo.h
index 2ad72c3c325..c7472a28c77 100644
--- a/llvm/include/llvm/MC/MCSubtargetInfo.h
+++ b/llvm/include/llvm/MC/MCSubtargetInfo.h
@@ -180,11 +180,6 @@ public:
auto Found = std::lower_bound(ProcDesc.begin(), ProcDesc.end(), CPU);
return Found != ProcDesc.end() && StringRef(Found->Key) == CPU;
}
-
- /// Returns string representation of scheduler comment
- virtual std::string getSchedInfoStr(MCInst const &MCI) const {
- return {};
- }
};
} // end namespace llvm
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 4c9fe59af42..b8d4466eaba 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -59,7 +59,6 @@
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Comdat.h"
#include "llvm/IR/Constant.h"
@@ -142,10 +141,6 @@ static const char *const CodeViewLineTablesGroupDescription =
STATISTIC(EmittedInsts, "Number of machine instrs printed");
-static cl::opt<bool>
- PrintSchedule("print-schedule", cl::Hidden, cl::init(false),
- cl::desc("Print 'sched: [latency:throughput]' in .s output"));
-
char AsmPrinter::ID = 0;
using gcp_map_type = DenseMap<GCStrategy *, std::unique_ptr<GCMetadataPrinter>>;
@@ -746,10 +741,7 @@ void AsmPrinter::EmitFunctionEntryLabel() {
}
/// emitComments - Pretty-print comments for instructions.
-/// It returns true iff the sched comment was emitted.
-/// Otherwise it returns false.
-static bool emitComments(const MachineInstr &MI, raw_ostream &CommentOS,
- AsmPrinter *AP) {
+static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
const MachineFunction *MF = MI.getMF();
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
@@ -757,7 +749,6 @@ static bool emitComments(const MachineInstr &MI, raw_ostream &CommentOS,
int FI;
const MachineFrameInfo &MFI = MF->getFrameInfo();
- bool Commented = false;
auto getSize =
[&MFI](const SmallVectorImpl<const MachineMemOperand *> &Accesses) {
@@ -777,43 +768,24 @@ static bool emitComments(const MachineInstr &MI, raw_ostream &CommentOS,
if (TII->isLoadFromStackSlotPostFE(MI, FI)) {
if (MFI.isSpillSlotObjectIndex(FI)) {
MMO = *MI.memoperands_begin();
- CommentOS << MMO->getSize() << "-byte Reload";
- Commented = true;
+ CommentOS << MMO->getSize() << "-byte Reload\n";
}
} else if (TII->hasLoadFromStackSlot(MI, Accesses)) {
- if (auto Size = getSize(Accesses)) {
- CommentOS << Size << "-byte Folded Reload";
- Commented = true;
- }
+ if (auto Size = getSize(Accesses))
+ CommentOS << Size << "-byte Folded Reload\n";
} else if (TII->isStoreToStackSlotPostFE(MI, FI)) {
if (MFI.isSpillSlotObjectIndex(FI)) {
MMO = *MI.memoperands_begin();
- CommentOS << MMO->getSize() << "-byte Spill";
- Commented = true;
+ CommentOS << MMO->getSize() << "-byte Spill\n";
}
} else if (TII->hasStoreToStackSlot(MI, Accesses)) {
- if (auto Size = getSize(Accesses)) {
- CommentOS << Size << "-byte Folded Spill";
- Commented = true;
- }
+ if (auto Size = getSize(Accesses))
+ CommentOS << Size << "-byte Folded Spill\n";
}
// Check for spill-induced copies
- if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse)) {
- Commented = true;
- CommentOS << " Reload Reuse";
- }
-
- if (Commented) {
- if (AP->EnablePrintSchedInfo) {
- // If any comment was added above and we need sched info comment then add
- // this new comment just after the above comment w/o "\n" between them.
- CommentOS << " " << MF->getSubtarget().getSchedInfoStr(MI) << "\n";
- return true;
- }
- CommentOS << "\n";
- }
- return false;
+ if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse))
+ CommentOS << " Reload Reuse\n";
}
/// emitImplicitDef - This method emits the specified machine instruction
@@ -1101,10 +1073,8 @@ void AsmPrinter::EmitFunctionBody() {
}
}
- if (isVerbose() && emitComments(MI, OutStreamer->GetCommentOS(), this)) {
- MachineInstr *MIP = const_cast<MachineInstr *>(&MI);
- MIP->setAsmPrinterFlag(MachineInstr::NoSchedComment);
- }
+ if (isVerbose())
+ emitComments(MI, OutStreamer->GetCommentOS());
switch (MI.getOpcode()) {
case TargetOpcode::CFI_INSTRUCTION:
@@ -1636,11 +1606,6 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
}
ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
-
- const TargetSubtargetInfo &STI = MF.getSubtarget();
- EnablePrintSchedInfo = PrintSchedule.getNumOccurrences()
- ? PrintSchedule
- : STI.supportPrintSchedInfo();
}
namespace {
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 6d5b4be539f..9e6d35c5e9a 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -18,7 +18,6 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InlineAsm.h"
@@ -154,7 +153,6 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
" we don't have an asm parser for this target\n");
Parser->setAssemblerDialect(Dialect);
Parser->setTargetParser(*TAP.get());
- Parser->setEnablePrintSchedInfo(EnablePrintSchedInfo);
// Enable lexing Masm binary and hex integer literals in intel inline
// assembly.
if (Dialect == InlineAsm::AD_Intel)
diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp
index 1b50b51dc5e..f35f1130817 100644
--- a/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -558,16 +558,13 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
continue;
LLVM_DEBUG(if (dump_intrs) {
- dbgs() << "\tFor the Pattern (" << (int)P << ") these instructions could be removed\n";
- for (auto const *InstrPtr : DelInstrs) {
- dbgs() << "\t\t" << STI->getSchedInfoStr(*InstrPtr) << ": ";
+ dbgs() << "\tFor the Pattern (" << (int)P
+ << ") these instructions could be removed\n";
+ for (auto const *InstrPtr : DelInstrs)
InstrPtr->print(dbgs(), false, false, false, TII);
- }
dbgs() << "\tThese instructions could replace the removed ones\n";
- for (auto const *InstrPtr : InsInstrs) {
- dbgs() << "\t\t" << STI->getSchedInfoStr(*InstrPtr) << ": ";
+ for (auto const *InstrPtr : InsInstrs)
InstrPtr->print(dbgs(), false, false, false, TII);
- }
});
bool SubstituteAlways = false;
diff --git a/llvm/lib/CodeGen/TargetSubtargetInfo.cpp b/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
index e34f9a1579d..7b29b68597c 100644
--- a/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
+++ b/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
@@ -11,14 +11,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/ADT/Optional.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/CodeGen/TargetSchedule.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
-#include <string>
using namespace llvm;
@@ -66,64 +58,4 @@ bool TargetSubtargetInfo::useAA() const {
return false;
}
-static std::string createSchedInfoStr(unsigned Latency, double RThroughput) {
- static const char *SchedPrefix = " sched: [";
- std::string Comment;
- raw_string_ostream CS(Comment);
- if (RThroughput != 0.0)
- CS << SchedPrefix << Latency << format(":%2.2f", RThroughput)
- << "]";
- else
- CS << SchedPrefix << Latency << ":?]";
- CS.flush();
- return Comment;
-}
-
-/// Returns string representation of scheduler comment
-std::string TargetSubtargetInfo::getSchedInfoStr(const MachineInstr &MI) const {
- if (MI.isPseudo() || MI.isTerminator())
- return std::string();
- // We don't cache TSchedModel because it depends on TargetInstrInfo
- // that could be changed during the compilation
- TargetSchedModel TSchedModel;
- TSchedModel.init(this);
- unsigned Latency = TSchedModel.computeInstrLatency(&MI);
-
- // Add extra latency due to forwarding delays.
- const MCSchedClassDesc &SCDesc = *TSchedModel.resolveSchedClass(&MI);
- Latency +=
- MCSchedModel::getForwardingDelayCycles(getReadAdvanceEntries(SCDesc));
-
- double RThroughput = TSchedModel.computeReciprocalThroughput(&MI);
- return createSchedInfoStr(Latency, RThroughput);
-}
-
-/// Returns string representation of scheduler comment
-std::string TargetSubtargetInfo::getSchedInfoStr(MCInst const &MCI) const {
- // We don't cache TSchedModel because it depends on TargetInstrInfo
- // that could be changed during the compilation
- TargetSchedModel TSchedModel;
- TSchedModel.init(this);
- unsigned Latency;
- if (TSchedModel.hasInstrSchedModel()) {
- Latency = TSchedModel.computeInstrLatency(MCI);
- // Add extra latency due to forwarding delays.
- const MCSchedModel &SM = *TSchedModel.getMCSchedModel();
- unsigned SClassID = getInstrInfo()->get(MCI.getOpcode()).getSchedClass();
- while (SM.getSchedClassDesc(SClassID)->isVariant())
- SClassID = resolveVariantSchedClass(SClassID, &MCI, SM.ProcID);
- const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SClassID);
- Latency +=
- MCSchedModel::getForwardingDelayCycles(getReadAdvanceEntries(SCDesc));
- } else if (TSchedModel.hasInstrItineraries()) {
- auto *ItinData = TSchedModel.getInstrItineraries();
- Latency = ItinData->getStageLatency(
- getInstrInfo()->get(MCI.getOpcode()).getSchedClass());
- } else
- return std::string();
- double RThroughput = TSchedModel.computeReciprocalThroughput(MCI);
- return createSchedInfoStr(Latency, RThroughput);
-}
-
-void TargetSubtargetInfo::mirFileLoaded(MachineFunction &MF) const {
-}
+void TargetSubtargetInfo::mirFileLoaded(MachineFunction &MF) const { }
diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp
index b18c82309ac..f5e40f5c604 100644
--- a/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/llvm/lib/MC/MCAsmStreamer.cpp
@@ -107,10 +107,7 @@ public:
void AddComment(const Twine &T, bool EOL = true) override;
/// Add a comment showing the encoding of an instruction.
- /// If PrintSchedInfo is true, then the comment sched:[x:y] will be added to
- /// the output if supported by the target.
- void AddEncodingComment(const MCInst &Inst, const MCSubtargetInfo &,
- bool PrintSchedInfo);
+ void AddEncodingComment(const MCInst &Inst, const MCSubtargetInfo &);
/// Return a raw_ostream that comments can be written to.
/// Unlike AddComment, you are required to terminate comments with \n if you
@@ -311,8 +308,7 @@ public:
void emitCGProfileEntry(const MCSymbolRefExpr *From,
const MCSymbolRefExpr *To, uint64_t Count) override;
- void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- bool PrintSchedInfo) override;
+ void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
void EmitBundleAlignMode(unsigned AlignPow2) override;
void EmitBundleLock(bool AlignToEnd) override;
@@ -1739,8 +1735,7 @@ void MCAsmStreamer::emitCGProfileEntry(const MCSymbolRefExpr *From,
}
void MCAsmStreamer::AddEncodingComment(const MCInst &Inst,
- const MCSubtargetInfo &STI,
- bool PrintSchedInfo) {
+ const MCSubtargetInfo &STI) {
raw_ostream &OS = GetCommentOS();
SmallString<256> Code;
SmallVector<MCFixup, 4> Fixups;
@@ -1819,11 +1814,7 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst,
}
}
}
- OS << "]";
- // If we are not going to add fixup or schedule comments after this point
- // then we have to end the current comment line with "\n".
- if (Fixups.size() || !PrintSchedInfo)
- OS << "\n";
+ OS << "]\n";
for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
MCFixup &F = Fixups[i];
@@ -1835,18 +1826,15 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst,
}
void MCAsmStreamer::EmitInstruction(const MCInst &Inst,
- const MCSubtargetInfo &STI,
- bool PrintSchedInfo) {
+ const MCSubtargetInfo &STI) {
assert(getCurrentSectionOnly() &&
"Cannot emit contents before setting section!");
// Show the encoding in a comment if we have a code emitter.
- AddEncodingComment(Inst, STI, PrintSchedInfo);
+ AddEncodingComment(Inst, STI);
// Show the MCInst if enabled.
if (ShowInst) {
- if (PrintSchedInfo)
- GetCommentOS() << "\n";
Inst.dump_pretty(GetCommentOS(), InstPrinter.get(), "\n ");
GetCommentOS() << "\n";
}
@@ -1856,12 +1844,6 @@ void MCAsmStreamer::EmitInstruction(const MCInst &Inst,
else
InstPrinter->printInst(&Inst, OS, "", STI);
- if (PrintSchedInfo) {
- std::string SI = STI.getSchedInfoStr(Inst);
- if (!SI.empty())
- GetCommentOS() << SI;
- }
-
StringRef Comments = CommentToEmit;
if (Comments.size() && Comments.back() != '\n')
GetCommentOS() << "\n";
diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp
index 6eada630b8c..1587d849866 100644
--- a/llvm/lib/MC/MCObjectStreamer.cpp
+++ b/llvm/lib/MC/MCObjectStreamer.cpp
@@ -314,7 +314,7 @@ bool MCObjectStreamer::mayHaveInstructions(MCSection &Sec) const {
}
void MCObjectStreamer::EmitInstruction(const MCInst &Inst,
- const MCSubtargetInfo &STI, bool) {
+ const MCSubtargetInfo &STI) {
getAssembler().getBackend().handleCodePaddingInstructionBegin(Inst);
EmitInstructionImpl(Inst, STI);
getAssembler().getBackend().handleCodePaddingInstructionEnd(Inst);
diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp
index c5900a89c1c..554cce1214a 100644
--- a/llvm/lib/MC/MCStreamer.cpp
+++ b/llvm/lib/MC/MCStreamer.cpp
@@ -952,8 +952,7 @@ void MCStreamer::visitUsedExpr(const MCExpr &Expr) {
}
}
-void MCStreamer::EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- bool) {
+void MCStreamer::EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &) {
// Scan for values.
for (unsigned i = Inst.getNumOperands(); i--;)
if (Inst.getOperand(i).isExpr())
diff --git a/llvm/lib/Object/RecordStreamer.cpp b/llvm/lib/Object/RecordStreamer.cpp
index 7b2042cc7fe..f39a6c28ed5 100644
--- a/llvm/lib/Object/RecordStreamer.cpp
+++ b/llvm/lib/Object/RecordStreamer.cpp
@@ -82,7 +82,7 @@ RecordStreamer::const_iterator RecordStreamer::begin() {
RecordStreamer::const_iterator RecordStreamer::end() { return Symbols.end(); }
void RecordStreamer::EmitInstruction(const MCInst &Inst,
- const MCSubtargetInfo &STI, bool) {
+ const MCSubtargetInfo &STI) {
MCStreamer::EmitInstruction(Inst, STI);
}
diff --git a/llvm/lib/Object/RecordStreamer.h b/llvm/lib/Object/RecordStreamer.h
index 5ac9cd6a4f5..c8b75bcc6d1 100644
--- a/llvm/lib/Object/RecordStreamer.h
+++ b/llvm/lib/Object/RecordStreamer.h
@@ -46,8 +46,7 @@ private:
public:
RecordStreamer(MCContext &Context, const Module &M);
- void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- bool) override;
+ void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
void EmitLabel(MCSymbol *Symbol, SMLoc Loc = SMLoc()) override;
void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) override;
bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override;
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index c5ff86a4992..c33f7e957b5 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -102,8 +102,8 @@ public:
/// This function is the one used to emit instruction data into the ELF
/// streamer. We override it to add the appropriate mapping symbol if
/// necessary.
- void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- bool) override {
+ void EmitInstruction(const MCInst &Inst,
+ const MCSubtargetInfo &STI) override {
EmitA64MappingSymbol();
MCELFStreamer::EmitInstruction(Inst, STI);
}
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 9069a87ae64..f51fbdcd84d 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -484,8 +484,8 @@ public:
/// This function is the one used to emit instruction data into the ELF
/// streamer. We override it to add the appropriate mapping symbol if
/// necessary.
- void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- bool) override {
+ void EmitInstruction(const MCInst &Inst,
+ const MCSubtargetInfo &STI) override {
if (IsThumb)
EmitThumbMappingSymbol();
else
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
index fa853f5de24..f2432883af6 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
@@ -59,7 +59,7 @@ HexagonMCELFStreamer::HexagonMCELFStreamer(
MCII(createHexagonMCInstrInfo()) {}
void HexagonMCELFStreamer::EmitInstruction(const MCInst &MCB,
- const MCSubtargetInfo &STI, bool) {
+ const MCSubtargetInfo &STI) {
assert(MCB.getOpcode() == Hexagon::BUNDLE);
assert(HexagonMCInstrInfo::bundleSize(MCB) <= HEXAGON_PACKET_SIZE);
assert(HexagonMCInstrInfo::bundleSize(MCB) > 0);
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h
index 8a130ab6d17..6248bd25d43 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h
@@ -30,8 +30,7 @@ public:
std::unique_ptr<MCCodeEmitter> Emitter,
MCAssembler *Assembler);
- void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- bool) override;
+ void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
void EmitSymbol(const MCInst &Inst);
void HexagonMCEmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment,
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
index 6317753a4cd..1b83e9445fb 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp
@@ -34,7 +34,7 @@ MipsELFStreamer::MipsELFStreamer(MCContext &Context,
}
void MipsELFStreamer::EmitInstruction(const MCInst &Inst,
- const MCSubtargetInfo &STI, bool) {
+ const MCSubtargetInfo &STI) {
MCELFStreamer::EmitInstruction(Inst, STI);
MCContext &Context = getContext();
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h b/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
index b760f1e7dc2..2febfbc69b6 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h
@@ -41,8 +41,7 @@ public:
/// \p Inst is actually emitted. For example, we can inspect the operands and
/// gather sufficient information that allows us to reason about the register
/// usage for the translation unit.
- void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- bool = false) override;
+ void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
/// Overriding this function allows us to record all labels that should be
/// marked as microMIPS. Based on this data marking is done in
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
index 6019e99b801..c050db8a17f 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
@@ -143,8 +143,8 @@ private:
public:
/// This function is the one used to emit instruction data into the ELF
/// streamer. We override it to mask dangerous instructions.
- void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
- bool) override {
+ void EmitInstruction(const MCInst &Inst,
+ const MCSubtargetInfo &STI) override {
// Sandbox indirect jumps.
if (isIndirectJump(Inst)) {
if (PendingCall)
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
index c3f4571dfdd..f65f6ee5596 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
@@ -194,8 +194,7 @@ public:
// X86AsmInstrumentation implementation:
void InstrumentAndEmitInstruction(const MCInst &Inst, OperandVector &Operands,
MCContext &Ctx, const MCInstrInfo &MII,
- MCStreamer &Out,
- /* unused */ bool) override {
+ MCStreamer &Out) override {
InstrumentMOVS(Inst, Operands, Ctx, MII, Out);
if (RepPrefix)
EmitInstruction(Out, MCInstBuilder(X86::REP_PREFIX));
@@ -1043,13 +1042,13 @@ X86AsmInstrumentation::~X86AsmInstrumentation() = default;
void X86AsmInstrumentation::InstrumentAndEmitInstruction(
const MCInst &Inst, OperandVector &Operands, MCContext &Ctx,
- const MCInstrInfo &MII, MCStreamer &Out, bool PrintSchedInfoEnabled) {
- EmitInstruction(Out, Inst, PrintSchedInfoEnabled);
+ const MCInstrInfo &MII, MCStreamer &Out) {
+ EmitInstruction(Out, Inst);
}
-void X86AsmInstrumentation::EmitInstruction(MCStreamer &Out, const MCInst &Inst,
- bool PrintSchedInfoEnabled) {
- Out.EmitInstruction(Inst, *STI, PrintSchedInfoEnabled);
+void X86AsmInstrumentation::EmitInstruction(MCStreamer &Out,
+ const MCInst &Inst) {
+ Out.EmitInstruction(Inst, *STI);
}
unsigned X86AsmInstrumentation::GetFrameRegGeneric(const MCContext &Ctx,
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.h b/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
index ef77089af80..58ecd7d9675 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
@@ -41,8 +41,7 @@ public:
virtual void InstrumentAndEmitInstruction(
const MCInst &Inst,
SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>> &Operands,
- MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out,
- bool PrintSchedInfoEnabled);
+ MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out);
protected:
friend X86AsmInstrumentation *
@@ -54,8 +53,7 @@ protected:
unsigned GetFrameRegGeneric(const MCContext &Ctx, MCStreamer &Out);
- void EmitInstruction(MCStreamer &Out, const MCInst &Inst,
- bool PrintSchedInfoEnabled = false);
+ void EmitInstruction(MCStreamer &Out, const MCInst &Inst);
const MCSubtargetInfo *&STI;
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 4c581083d26..44173d9b2b3 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -2864,8 +2864,7 @@ static const char *getSubtargetFeatureName(uint64_t Val);
void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
MCStreamer &Out) {
Instrumentation->InstrumentAndEmitInstruction(
- Inst, Operands, getContext(), MII, Out,
- getParser().shouldPrintSchedInfo());
+ Inst, Operands, getContext(), MII, Out);
}
bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
diff --git a/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp b/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
index 06a87ec3408..a6d07c1c5be 100644
--- a/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -1303,6 +1303,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
OS << ']';
--i; // For loop increments element #.
}
+ OS << '\n';
// We successfully added a comment to this instruction.
return true;
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 7b2141ce546..62dd685b360 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -59,9 +59,7 @@ namespace X86 {
IP_HAS_REPEAT_NE = 4,
IP_HAS_REPEAT = 8,
IP_HAS_LOCK = 16,
- NO_SCHED_INFO = 32, // Don't add sched comment to the current instr because
- // it was already added
- IP_HAS_NOTRACK = 64
+ IP_HAS_NOTRACK = 32
};
} // end namespace X86;
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index a2a1c7c2390..cc2a1a43b67 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -100,9 +100,7 @@ void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding(
}
void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) {
- OutStreamer->EmitInstruction(Inst, getSubtargetInfo(),
- EnablePrintSchedInfo &&
- !(Inst.getFlags() & X86::NO_SCHED_INFO));
+ OutStreamer->EmitInstruction(Inst, getSubtargetInfo());
SMShadowTracker.count(Inst, getSubtargetInfo(), CodeEmitter.get());
}
@@ -1860,8 +1858,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
SmallVector<int, 64> Mask;
DecodePSHUFBMask(C, Width, Mask);
if (!Mask.empty())
- OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask),
- !EnablePrintSchedInfo);
+ OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
}
break;
}
@@ -1933,8 +1930,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
SmallVector<int, 16> Mask;
DecodeVPERMILPMask(C, ElSize, Width, Mask);
if (!Mask.empty())
- OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask),
- !EnablePrintSchedInfo);
+ OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
}
break;
}
@@ -1965,8 +1961,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
SmallVector<int, 16> Mask;
DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Width, Mask);
if (!Mask.empty())
- OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask),
- !EnablePrintSchedInfo);
+ OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask));
}
break;
}
@@ -1983,8 +1978,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
SmallVector<int, 16> Mask;
DecodeVPPERMMask(C, Width, Mask);
if (!Mask.empty())
- OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask),
- !EnablePrintSchedInfo);
+ OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask));
}
break;
}
@@ -2001,7 +1995,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
CS << X86ATTInstPrinter::getRegisterName(DstOp.getReg()) << " = ";
if (auto *CF = dyn_cast<ConstantFP>(C)) {
CS << "0x" << CF->getValueAPF().bitcastToAPInt().toString(16, false);
- OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
+ OutStreamer->AddComment(CS.str());
}
}
break;
@@ -2098,7 +2092,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
}
CS << "]";
- OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
+ OutStreamer->AddComment(CS.str());
} else if (auto *CV = dyn_cast<ConstantVector>(C)) {
CS << "<";
for (int l = 0; l != NumLanes; ++l) {
@@ -2110,7 +2104,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
}
CS << ">";
- OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
+ OutStreamer->AddComment(CS.str());
}
}
break;
@@ -2197,14 +2191,12 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
printConstant(C, CS);
}
CS << "]";
- OutStreamer->AddComment(CS.str(), !EnablePrintSchedInfo);
+ OutStreamer->AddComment(CS.str());
}
}
MCInst TmpInst;
MCInstLowering.Lower(MI, TmpInst);
- if (MI->getAsmPrinterFlag(MachineInstr::NoSchedComment))
- TmpInst.setFlags(TmpInst.getFlags() | X86::NO_SCHED_INFO);
// Stackmap shadows cannot include branch targets, so we can count the bytes
// in a call towards the shadow, but must ensure that the no thread returns
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index 5d99577ea19..6e2e4708005 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -833,9 +833,6 @@ public:
/// Enable the MachineScheduler pass for all X86 subtargets.
bool enableMachineScheduler() const override { return true; }
- // TODO: Update the regression tests and return true.
- bool supportPrintSchedInfo() const override { return false; }
-
bool enableEarlyIfConversion() const override;
AntiDepBreakMode getAntiDepBreakMode() const override {
diff --git a/llvm/test/CodeGen/X86/3dnow-schedule.ll b/llvm/test/CodeGen/X86/3dnow-schedule.ll
deleted file mode 100644
index eb317ebbb2a..00000000000
--- a/llvm/test/CodeGen/X86/3dnow-schedule.ll
+++ /dev/null
@@ -1,394 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+3dnowa | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-
-define void @test_femms() optsize {
-; CHECK-LABEL: test_femms:
-; CHECK: # %bb.0:
-; CHECK-NEXT: femms # sched: [31:10.33]
-; CHECK-NEXT: retq # sched: [1:1.00]
- call void @llvm.x86.mmx.femms()
- ret void
-}
-declare void @llvm.x86.mmx.femms() nounwind readnone
-
-define i64 @test_pavgusb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; CHECK-LABEL: test_pavgusb:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pavgusb %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT: pavgusb (%rdi), %mm0 # sched: [8:1.00]
-; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; CHECK-NEXT: retq # sched: [1:1.00]
- %1 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pf2id(x86_mmx* %a0) optsize {
-; CHECK-LABEL: test_pf2id:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pf2id (%rdi), %mm0 # sched: [9:1.00]
-; CHECK-NEXT: pf2id %mm0, %mm0 # sched: [3:1.00]
-; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; CHECK-NEXT: retq # sched: [1:1.00]
- %1 = load x86_mmx, x86_mmx *%a0, align 8
- %2 = call x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx %1)
- %3 = call x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx) nounwind readnone
-
-define i64 @test_pf2iw(x86_mmx* %a0) optsize {
-; CHECK-LABEL: test_pf2iw:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pf2iw (%rdi), %mm0 # sched: [9:1.00]
-; CHECK-NEXT: pf2iw %mm0, %mm0 # sched: [3:1.00]
-; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; CHECK-NEXT: retq # sched: [1:1.00]
- %1 = load x86_mmx, x86_mmx *%a0, align 8
- %2 = call x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx %1)
- %3 = call x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx) nounwind readnone
-
-define i64 @test_pfacc(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; CHECK-LABEL: test_pfacc:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pfacc %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT: pfacc (%rdi), %mm0 # sched: [9:1.00]
-; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; CHECK-NEXT: retq # sched: [1:1.00]
- %1 = call x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pfadd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; CHECK-LABEL: test_pfadd:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pfadd %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT: pfadd (%rdi), %mm0 # sched: [9:1.00]
-; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; CHECK-NEXT: retq # sched: [1:1.00]
- %1 = call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pfcmpeq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; CHECK-LABEL: test_pfcmpeq:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pfcmpeq %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT: pfcmpeq (%rdi), %mm0 # sched: [9:1.00]
-; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; CHECK-NEXT: retq # sched: [1:1.00]
- %1 = call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pfcmpge(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; CHECK-LABEL: test_pfcmpge:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pfcmpge %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT: pfcmpge (%rdi), %mm0 # sched: [9:1.00]
-; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; CHECK-NEXT: retq # sched: [1:1.00]
- %1 = call x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pfcmpgt(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; CHECK-LABEL: test_pfcmpgt:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pfcmpgt %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT: pfcmpgt (%rdi), %mm0 # sched: [9:1.00]
-; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; CHECK-NEXT: retq # sched: [1:1.00]
- %1 = call x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pfmax(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; CHECK-LABEL: test_pfmax:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pfmax %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT: pfmax (%rdi), %mm0 # sched: [9:1.00]
-; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; CHECK-NEXT: retq # sched: [1:1.00]
- %1 = call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pfmin(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; CHECK-LABEL: test_pfmin:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pfmin %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT: pfmin (%rdi), %mm0 # sched: [9:1.00]
-; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; CHECK-NEXT: retq # sched: [1:1.00]
- %1 = call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pfmul(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; CHECK-LABEL: test_pfmul:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pfmul %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT: pfmul (%rdi), %mm0 # sched: [9:1.00]
-; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; CHECK-NEXT: retq # sched: [1:1.00]
- %1 = call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pfnacc(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; CHECK-LABEL: test_pfnacc:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pfnacc %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT: pfnacc (%rdi), %mm0 # sched: [9:1.00]
-; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; CHECK-NEXT: retq # sched: [1:1.00]
- %1 = call x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pfpnacc(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; CHECK-LABEL: test_pfpnacc:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pfpnacc %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT: pfpnacc (%rdi), %mm0 # sched: [9:1.00]
-; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; CHECK-NEXT: retq # sched: [1:1.00]
- %1 = call x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pfrcp(x86_mmx* %a0) optsize {
-; CHECK-LABEL: test_pfrcp:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pfrcp (%rdi), %mm0 # sched: [9:1.00]
-; CHECK-NEXT: pfrcp %mm0, %mm0 # sched: [3:1.00]
-; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; CHECK-NEXT: retq # sched: [1:1.00]
- %1 = load x86_mmx, x86_mmx *%a0, align 8
- %2 = call x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx %1)
- %3 = call x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx) nounwind readnone
-
-define i64 @test_pfrcpit1(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; CHECK-LABEL: test_pfrcpit1:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pfrcpit1 %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT: pfrcpit1 (%rdi), %mm0 # sched: [9:1.00]
-; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; CHECK-NEXT: retq # sched: [1:1.00]
- %1 = call x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pfrcpit2(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; CHECK-LABEL: test_pfrcpit2:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pfrcpit2 %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT: pfrcpit2 (%rdi), %mm0 # sched: [9:1.00]
-; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; CHECK-NEXT: retq # sched: [1:1.00]
- %1 = call x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pfrsqit1(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; CHECK-LABEL: test_pfrsqit1:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pfrsqit1 %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT: pfrsqit1 (%rdi), %mm0 # sched: [9:1.00]
-; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; CHECK-NEXT: retq # sched: [1:1.00]
- %1 = call x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pfrsqrt(x86_mmx* %a0) optsize {
-; CHECK-LABEL: test_pfrsqrt:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pfrsqrt (%rdi), %mm0 # sched: [9:1.00]
-; CHECK-NEXT: pfrsqrt %mm0, %mm0 # sched: [3:1.00]
-; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; CHECK-NEXT: retq # sched: [1:1.00]
- %1 = load x86_mmx, x86_mmx *%a0, align 8
- %2 = call x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx %1)
- %3 = call x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx) nounwind readnone
-
-define i64 @test_pfsub(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; CHECK-LABEL: test_pfsub:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pfsub %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT: pfsub (%rdi), %mm0 # sched: [9:1.00]
-; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; CHECK-NEXT: retq # sched: [1:1.00]
- %1 = call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pfsubr(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; CHECK-LABEL: test_pfsubr:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pfsubr %mm1, %mm0 # sched: [3:1.00]
-; CHECK-NEXT: pfsubr (%rdi), %mm0 # sched: [9:1.00]
-; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; CHECK-NEXT: retq # sched: [1:1.00]
- %1 = call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pi2fd(x86_mmx* %a0) optsize {
-; CHECK-LABEL: test_pi2fd:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pi2fd (%rdi), %mm0 # sched: [9:1.00]
-; CHECK-NEXT: pi2fd %mm0, %mm0 # sched: [3:1.00]
-; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; CHECK-NEXT: retq # sched: [1:1.00]
- %1 = load x86_mmx, x86_mmx *%a0, align 8
- %2 = call x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx %1)
- %3 = call x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx) nounwind readnone
-
-define i64 @test_pi2fw(x86_mmx* %a0) optsize {
-; CHECK-LABEL: test_pi2fw:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pi2fw (%rdi), %mm0 # sched: [9:1.00]
-; CHECK-NEXT: pi2fw %mm0, %mm0 # sched: [3:1.00]
-; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; CHECK-NEXT: retq # sched: [1:1.00]
- %1 = load x86_mmx, x86_mmx *%a0, align 8
- %2 = call x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx %1)
- %3 = call x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx) nounwind readnone
-
-define i64 @test_pmulhrw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; CHECK-LABEL: test_pmulhrw:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pmulhrw %mm1, %mm0 # sched: [5:1.00]
-; CHECK-NEXT: pmulhrw (%rdi), %mm0 # sched: [10:1.00]
-; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; CHECK-NEXT: retq # sched: [1:1.00]
- %1 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx, x86_mmx) nounwind readnone
-
-define void @test_prefetch(i8* %a0) optsize {
-; CHECK-LABEL: test_prefetch:
-; CHECK: # %bb.0:
-; CHECK-NEXT: #APP
-; CHECK-NEXT: prefetch (%rdi) # sched: [5:0.50]
-; CHECK-NEXT: #NO_APP
-; CHECK-NEXT: retq # sched: [1:1.00]
- tail call void asm sideeffect "prefetch $0", "*m"(i8 *%a0) nounwind
- ret void
-}
-
-define void @test_prefetchw(i8* %a0) optsize {
-; CHECK-LABEL: test_prefetchw:
-; CHECK: # %bb.0:
-; CHECK-NEXT: #APP
-; CHECK-NEXT: prefetchw (%rdi) # sched: [5:0.50]
-; CHECK-NEXT: #NO_APP
-; CHECK-NEXT: retq # sched: [1:1.00]
- tail call void asm sideeffect "prefetchw $0", "*m"(i8 *%a0) nounwind
- ret void
-}
-
-define i64 @test_pswapd(x86_mmx* %a0) optsize {
-; CHECK-LABEL: test_pswapd:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pswapd (%rdi), %mm0 # mm0 = mem[1,0] sched: [6:1.00]
-; CHECK-NEXT: pswapd %mm0, %mm0 # mm0 = mm0[1,0] sched: [1:1.00]
-; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; CHECK-NEXT: retq # sched: [1:1.00]
- %1 = load x86_mmx, x86_mmx *%a0, align 8
- %2 = call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %1)
- %3 = call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx) nounwind readnone
diff --git a/llvm/test/CodeGen/X86/adx-schedule.ll b/llvm/test/CodeGen/X86/adx-schedule.ll
deleted file mode 100644
index e6c65b2d214..00000000000
--- a/llvm/test/CodeGen/X86/adx-schedule.ll
+++ /dev/null
@@ -1,114 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+adx | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE --check-prefix=SKL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE --check-prefix=SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
-
-define void @test_adcx(i32 %a0, i32* %a1, i64 %a2, i64* %a3) optsize {
-; GENERIC-LABEL: test_adcx:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: adcxl %edi, %edi # sched: [2:0.67]
-; GENERIC-NEXT: adcxq %rdx, %rdx # sched: [2:0.67]
-; GENERIC-NEXT: adcxl (%rsi), %edi # sched: [7:0.67]
-; GENERIC-NEXT: adcxq (%rcx), %rdx # sched: [7:0.67]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BROADWELL-LABEL: test_adcx:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: adcxl %edi, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: adcxq %rdx, %rdx # sched: [1:0.50]
-; BROADWELL-NEXT: adcxl (%rsi), %edi # sched: [6:0.50]
-; BROADWELL-NEXT: adcxq (%rcx), %rdx # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_adcx:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: adcxl %edi, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: adcxq %rdx, %rdx # sched: [1:0.50]
-; SKYLAKE-NEXT: adcxl (%rsi), %edi # sched: [6:0.50]
-; SKYLAKE-NEXT: adcxq (%rcx), %rdx # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_adcx:
-; KNL: # %bb.0:
-; KNL-NEXT: #APP
-; KNL-NEXT: adcxl %edi, %edi # sched: [2:0.50]
-; KNL-NEXT: adcxq %rdx, %rdx # sched: [2:0.50]
-; KNL-NEXT: adcxl (%rsi), %edi # sched: [7:0.50]
-; KNL-NEXT: adcxq (%rcx), %rdx # sched: [7:0.50]
-; KNL-NEXT: #NO_APP
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_adcx:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: adcxl %edi, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: adcxq %rdx, %rdx # sched: [1:0.25]
-; ZNVER1-NEXT: adcxl (%rsi), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: adcxq (%rcx), %rdx # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "adcx $0, $0 \0A\09 adcx $2, $2 \0A\09 adcx $1, $0 \0A\09 adcx $3, $2", "r,*m,r,*m"(i32 %a0, i32* %a1, i64 %a2, i64* %a3) nounwind
- ret void
-}
-define void @test_adox(i32 %a0, i32* %a1, i64 %a2, i64* %a3) optsize {
-; GENERIC-LABEL: test_adox:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: adoxl %edi, %edi # sched: [2:0.67]
-; GENERIC-NEXT: adoxq %rdx, %rdx # sched: [2:0.67]
-; GENERIC-NEXT: adoxl (%rsi), %edi # sched: [7:0.67]
-; GENERIC-NEXT: adoxq (%rcx), %rdx # sched: [7:0.67]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BROADWELL-LABEL: test_adox:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: adoxl %edi, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: adoxq %rdx, %rdx # sched: [1:0.50]
-; BROADWELL-NEXT: adoxl (%rsi), %edi # sched: [6:0.50]
-; BROADWELL-NEXT: adoxq (%rcx), %rdx # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_adox:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: adoxl %edi, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: adoxq %rdx, %rdx # sched: [1:0.50]
-; SKYLAKE-NEXT: adoxl (%rsi), %edi # sched: [6:0.50]
-; SKYLAKE-NEXT: adoxq (%rcx), %rdx # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_adox:
-; KNL: # %bb.0:
-; KNL-NEXT: #APP
-; KNL-NEXT: adoxl %edi, %edi # sched: [2:0.50]
-; KNL-NEXT: adoxq %rdx, %rdx # sched: [2:0.50]
-; KNL-NEXT: adoxl (%rsi), %edi # sched: [7:0.50]
-; KNL-NEXT: adoxq (%rcx), %rdx # sched: [7:0.50]
-; KNL-NEXT: #NO_APP
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_adox:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: adoxl %edi, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: adoxq %rdx, %rdx # sched: [1:0.25]
-; ZNVER1-NEXT: adoxl (%rsi), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: adoxq (%rcx), %rdx # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "adox $0, $0 \0A\09 adox $2, $2 \0A\09 adox $1, $0 \0A\09 adox $3, $2", "r,*m,r,*m"(i32 %a0, i32* %a1, i64 %a2, i64* %a3) nounwind
- ret void
-}
diff --git a/llvm/test/CodeGen/X86/aes-schedule.ll b/llvm/test/CodeGen/X86/aes-schedule.ll
deleted file mode 100644
index c622899ca09..00000000000
--- a/llvm/test/CodeGen/X86/aes-schedule.ll
+++ /dev/null
@@ -1,751 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+aes | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=+aes | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=goldmont -mattr=+aes | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,SKX-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1
-
-define <2 x i64> @test_aesdec(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
-; GENERIC-LABEL: test_aesdec:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: aesdec %xmm1, %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: aesdec (%rdi), %xmm0 # sched: [13:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_aesdec:
-; SLM: # %bb.0:
-; SLM-NEXT: aesdec %xmm1, %xmm0 # sched: [8:5.00]
-; SLM-NEXT: aesdec (%rdi), %xmm0 # sched: [8:5.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_aesdec:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: aesdec %xmm1, %xmm0 # sched: [7:1.00]
-; SANDY-SSE-NEXT: aesdec (%rdi), %xmm0 # sched: [13:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_aesdec:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vaesdec %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_aesdec:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: aesdec %xmm1, %xmm0 # sched: [7:1.00]
-; HASWELL-SSE-NEXT: aesdec (%rdi), %xmm0 # sched: [13:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_aesdec:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vaesdec %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_aesdec:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: aesdec %xmm1, %xmm0 # sched: [7:1.00]
-; BROADWELL-SSE-NEXT: aesdec (%rdi), %xmm0 # sched: [12:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_aesdec:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vaesdec %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
-; BROADWELL-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [12:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_aesdec:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: aesdec %xmm1, %xmm0 # sched: [4:1.00]
-; SKYLAKE-SSE-NEXT: aesdec (%rdi), %xmm0 # sched: [10:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_aesdec:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vaesdec %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; SKYLAKE-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_aesdec:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: aesdec %xmm1, %xmm0 # sched: [4:1.00]
-; SKX-SSE-NEXT: aesdec (%rdi), %xmm0 # sched: [10:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_aesdec:
-; SKX: # %bb.0:
-; SKX-NEXT: vaesdec %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; SKX-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_aesdec:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: aesdec %xmm1, %xmm0 # sched: [9:1.00]
-; BDVER2-SSE-NEXT: aesdec (%rdi), %xmm0 # sched: [14:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_aesdec:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vaesdec %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
-; BDVER2-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [14:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_aesdec:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: aesdec %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: aesdec (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_aesdec:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vaesdec %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_aesdec:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: aesdec %xmm1, %xmm0 # sched: [4:0.50]
-; ZNVER1-SSE-NEXT: aesdec (%rdi), %xmm0 # sched: [11:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_aesdec:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vaesdec %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; ZNVER1-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [11:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load <2 x i64>, <2 x i64> *%a2, align 16
- %2 = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1)
- %3 = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %2, <2 x i64> %1)
- ret <2 x i64> %3
-}
-declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>)
-
-define <2 x i64> @test_aesdeclast(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
-; GENERIC-LABEL: test_aesdeclast:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: aesdeclast %xmm1, %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: aesdeclast (%rdi), %xmm0 # sched: [13:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_aesdeclast:
-; SLM: # %bb.0:
-; SLM-NEXT: aesdeclast %xmm1, %xmm0 # sched: [8:5.00]
-; SLM-NEXT: aesdeclast (%rdi), %xmm0 # sched: [8:5.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_aesdeclast:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: aesdeclast %xmm1, %xmm0 # sched: [7:1.00]
-; SANDY-SSE-NEXT: aesdeclast (%rdi), %xmm0 # sched: [13:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_aesdeclast:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_aesdeclast:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: aesdeclast %xmm1, %xmm0 # sched: [7:1.00]
-; HASWELL-SSE-NEXT: aesdeclast (%rdi), %xmm0 # sched: [13:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_aesdeclast:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_aesdeclast:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: aesdeclast %xmm1, %xmm0 # sched: [7:1.00]
-; BROADWELL-SSE-NEXT: aesdeclast (%rdi), %xmm0 # sched: [12:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_aesdeclast:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
-; BROADWELL-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [12:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_aesdeclast:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: aesdeclast %xmm1, %xmm0 # sched: [4:1.00]
-; SKYLAKE-SSE-NEXT: aesdeclast (%rdi), %xmm0 # sched: [10:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_aesdeclast:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; SKYLAKE-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_aesdeclast:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: aesdeclast %xmm1, %xmm0 # sched: [4:1.00]
-; SKX-SSE-NEXT: aesdeclast (%rdi), %xmm0 # sched: [10:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_aesdeclast:
-; SKX: # %bb.0:
-; SKX-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; SKX-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_aesdeclast:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: aesdeclast %xmm1, %xmm0 # sched: [9:1.00]
-; BDVER2-SSE-NEXT: aesdeclast (%rdi), %xmm0 # sched: [14:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_aesdeclast:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
-; BDVER2-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [14:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_aesdeclast:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: aesdeclast %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: aesdeclast (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_aesdeclast:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_aesdeclast:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: aesdeclast %xmm1, %xmm0 # sched: [4:0.50]
-; ZNVER1-SSE-NEXT: aesdeclast (%rdi), %xmm0 # sched: [11:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_aesdeclast:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; ZNVER1-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [11:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load <2 x i64>, <2 x i64> *%a2, align 16
- %2 = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1)
- %3 = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %2, <2 x i64> %1)
- ret <2 x i64> %3
-}
-declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>)
-
-define <2 x i64> @test_aesenc(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
-; GENERIC-LABEL: test_aesenc:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: aesenc %xmm1, %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: aesenc (%rdi), %xmm0 # sched: [13:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_aesenc:
-; SLM: # %bb.0:
-; SLM-NEXT: aesenc %xmm1, %xmm0 # sched: [8:5.00]
-; SLM-NEXT: aesenc (%rdi), %xmm0 # sched: [8:5.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_aesenc:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: aesenc %xmm1, %xmm0 # sched: [7:1.00]
-; SANDY-SSE-NEXT: aesenc (%rdi), %xmm0 # sched: [13:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_aesenc:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vaesenc %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_aesenc:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: aesenc %xmm1, %xmm0 # sched: [7:1.00]
-; HASWELL-SSE-NEXT: aesenc (%rdi), %xmm0 # sched: [13:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_aesenc:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vaesenc %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_aesenc:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: aesenc %xmm1, %xmm0 # sched: [7:1.00]
-; BROADWELL-SSE-NEXT: aesenc (%rdi), %xmm0 # sched: [12:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_aesenc:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vaesenc %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
-; BROADWELL-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [12:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_aesenc:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: aesenc %xmm1, %xmm0 # sched: [4:1.00]
-; SKYLAKE-SSE-NEXT: aesenc (%rdi), %xmm0 # sched: [10:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_aesenc:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vaesenc %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; SKYLAKE-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_aesenc:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: aesenc %xmm1, %xmm0 # sched: [4:1.00]
-; SKX-SSE-NEXT: aesenc (%rdi), %xmm0 # sched: [10:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_aesenc:
-; SKX: # %bb.0:
-; SKX-NEXT: vaesenc %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; SKX-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_aesenc:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: aesenc %xmm1, %xmm0 # sched: [9:1.00]
-; BDVER2-SSE-NEXT: aesenc (%rdi), %xmm0 # sched: [14:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_aesenc:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vaesenc %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
-; BDVER2-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [14:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_aesenc:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: aesenc %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: aesenc (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_aesenc:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vaesenc %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_aesenc:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: aesenc %xmm1, %xmm0 # sched: [4:0.50]
-; ZNVER1-SSE-NEXT: aesenc (%rdi), %xmm0 # sched: [11:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_aesenc:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vaesenc %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; ZNVER1-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [11:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load <2 x i64>, <2 x i64> *%a2, align 16
- %2 = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1)
- %3 = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %2, <2 x i64> %1)
- ret <2 x i64> %3
-}
-declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>)
-
-define <2 x i64> @test_aesenclast(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
-; GENERIC-LABEL: test_aesenclast:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: aesenclast %xmm1, %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: aesenclast (%rdi), %xmm0 # sched: [13:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_aesenclast:
-; SLM: # %bb.0:
-; SLM-NEXT: aesenclast %xmm1, %xmm0 # sched: [8:5.00]
-; SLM-NEXT: aesenclast (%rdi), %xmm0 # sched: [8:5.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_aesenclast:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: aesenclast %xmm1, %xmm0 # sched: [7:1.00]
-; SANDY-SSE-NEXT: aesenclast (%rdi), %xmm0 # sched: [13:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_aesenclast:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_aesenclast:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: aesenclast %xmm1, %xmm0 # sched: [7:1.00]
-; HASWELL-SSE-NEXT: aesenclast (%rdi), %xmm0 # sched: [13:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_aesenclast:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_aesenclast:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: aesenclast %xmm1, %xmm0 # sched: [7:1.00]
-; BROADWELL-SSE-NEXT: aesenclast (%rdi), %xmm0 # sched: [12:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_aesenclast:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
-; BROADWELL-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [12:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_aesenclast:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: aesenclast %xmm1, %xmm0 # sched: [4:1.00]
-; SKYLAKE-SSE-NEXT: aesenclast (%rdi), %xmm0 # sched: [10:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_aesenclast:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; SKYLAKE-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_aesenclast:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: aesenclast %xmm1, %xmm0 # sched: [4:1.00]
-; SKX-SSE-NEXT: aesenclast (%rdi), %xmm0 # sched: [10:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_aesenclast:
-; SKX: # %bb.0:
-; SKX-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; SKX-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_aesenclast:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: aesenclast %xmm1, %xmm0 # sched: [9:1.00]
-; BDVER2-SSE-NEXT: aesenclast (%rdi), %xmm0 # sched: [14:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_aesenclast:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
-; BDVER2-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [14:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_aesenclast:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: aesenclast %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: aesenclast (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_aesenclast:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_aesenclast:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: aesenclast %xmm1, %xmm0 # sched: [4:0.50]
-; ZNVER1-SSE-NEXT: aesenclast (%rdi), %xmm0 # sched: [11:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_aesenclast:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; ZNVER1-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [11:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load <2 x i64>, <2 x i64> *%a2, align 16
- %2 = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1)
- %3 = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %2, <2 x i64> %1)
- ret <2 x i64> %3
-}
-declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>)
-
-define <2 x i64> @test_aesimc(<2 x i64> %a0, <2 x i64> *%a1) {
-; GENERIC-LABEL: test_aesimc:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: aesimc %xmm0, %xmm1 # sched: [12:2.00]
-; GENERIC-NEXT: aesimc (%rdi), %xmm0 # sched: [18:2.00]
-; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_aesimc:
-; SLM: # %bb.0:
-; SLM-NEXT: aesimc %xmm0, %xmm1 # sched: [8:5.00]
-; SLM-NEXT: aesimc (%rdi), %xmm0 # sched: [8:5.00]
-; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_aesimc:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: aesimc %xmm0, %xmm1 # sched: [12:2.00]
-; SANDY-SSE-NEXT: aesimc (%rdi), %xmm0 # sched: [18:2.00]
-; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_aesimc:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vaesimc %xmm0, %xmm0 # sched: [12:2.00]
-; SANDY-NEXT: vaesimc (%rdi), %xmm1 # sched: [18:2.00]
-; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_aesimc:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: aesimc %xmm0, %xmm1 # sched: [14:2.00]
-; HASWELL-SSE-NEXT: aesimc (%rdi), %xmm0 # sched: [20:2.00]
-; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_aesimc:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vaesimc %xmm0, %xmm0 # sched: [14:2.00]
-; HASWELL-NEXT: vaesimc (%rdi), %xmm1 # sched: [20:2.00]
-; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_aesimc:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: aesimc (%rdi), %xmm1 # sched: [19:2.00]
-; BROADWELL-SSE-NEXT: aesimc %xmm0, %xmm0 # sched: [14:2.00]
-; BROADWELL-SSE-NEXT: por %xmm0, %xmm1 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_aesimc:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vaesimc (%rdi), %xmm1 # sched: [19:2.00]
-; BROADWELL-NEXT: vaesimc %xmm0, %xmm0 # sched: [14:2.00]
-; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_aesimc:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: aesimc %xmm0, %xmm1 # sched: [8:2.00]
-; SKYLAKE-SSE-NEXT: aesimc (%rdi), %xmm0 # sched: [14:2.00]
-; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_aesimc:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vaesimc %xmm0, %xmm0 # sched: [8:2.00]
-; SKYLAKE-NEXT: vaesimc (%rdi), %xmm1 # sched: [14:2.00]
-; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_aesimc:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: aesimc %xmm0, %xmm1 # sched: [8:2.00]
-; SKX-SSE-NEXT: aesimc (%rdi), %xmm0 # sched: [14:2.00]
-; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_aesimc:
-; SKX: # %bb.0:
-; SKX-NEXT: vaesimc %xmm0, %xmm0 # sched: [8:2.00]
-; SKX-NEXT: vaesimc (%rdi), %xmm1 # sched: [14:2.00]
-; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_aesimc:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: aesimc %xmm0, %xmm1 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: aesimc (%rdi), %xmm0 # sched: [10:1.00]
-; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_aesimc:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vaesimc (%rdi), %xmm1 # sched: [10:1.00]
-; BDVER2-NEXT: vaesimc %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_aesimc:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: aesimc %xmm0, %xmm1 # sched: [2:1.00]
-; BTVER2-SSE-NEXT: aesimc (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_aesimc:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vaesimc (%rdi), %xmm1 # sched: [7:1.00]
-; BTVER2-NEXT: vaesimc %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_aesimc:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: aesimc %xmm0, %xmm1 # sched: [4:0.50]
-; ZNVER1-SSE-NEXT: aesimc (%rdi), %xmm0 # sched: [11:0.50]
-; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_aesimc:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vaesimc (%rdi), %xmm1 # sched: [11:0.50]
-; ZNVER1-NEXT: vaesimc %xmm0, %xmm0 # sched: [4:0.50]
-; ZNVER1-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load <2 x i64>, <2 x i64> *%a1, align 16
- %2 = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0)
- %3 = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %1)
- %4 = or <2 x i64> %2, %3
- ret <2 x i64> %4
-}
-declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>)
-
-define <2 x i64> @test_aeskeygenassist(<2 x i64> %a0, <2 x i64> *%a1) {
-; GENERIC-LABEL: test_aeskeygenassist:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: aeskeygenassist $7, %xmm0, %xmm1 # sched: [8:3.67]
-; GENERIC-NEXT: aeskeygenassist $7, (%rdi), %xmm0 # sched: [8:3.33]
-; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_aeskeygenassist:
-; SLM: # %bb.0:
-; SLM-NEXT: aeskeygenassist $7, %xmm0, %xmm1 # sched: [8:5.00]
-; SLM-NEXT: aeskeygenassist $7, (%rdi), %xmm0 # sched: [8:5.00]
-; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_aeskeygenassist:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: aeskeygenassist $7, %xmm0, %xmm1 # sched: [8:3.67]
-; SANDY-SSE-NEXT: aeskeygenassist $7, (%rdi), %xmm0 # sched: [8:3.33]
-; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_aeskeygenassist:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 # sched: [8:3.67]
-; SANDY-NEXT: vaeskeygenassist $7, (%rdi), %xmm1 # sched: [8:3.33]
-; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_aeskeygenassist:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: aeskeygenassist $7, %xmm0, %xmm1 # sched: [29:7.00]
-; HASWELL-SSE-NEXT: aeskeygenassist $7, (%rdi), %xmm0 # sched: [34:7.00]
-; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_aeskeygenassist:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 # sched: [29:7.00]
-; HASWELL-NEXT: vaeskeygenassist $7, (%rdi), %xmm1 # sched: [34:7.00]
-; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_aeskeygenassist:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: aeskeygenassist $7, %xmm0, %xmm1 # sched: [29:7.00]
-; BROADWELL-SSE-NEXT: aeskeygenassist $7, (%rdi), %xmm0 # sched: [33:7.00]
-; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_aeskeygenassist:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 # sched: [29:7.00]
-; BROADWELL-NEXT: vaeskeygenassist $7, (%rdi), %xmm1 # sched: [33:7.00]
-; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_aeskeygenassist:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: aeskeygenassist $7, %xmm0, %xmm1 # sched: [20:6.00]
-; SKYLAKE-SSE-NEXT: aeskeygenassist $7, (%rdi), %xmm0 # sched: [25:6.00]
-; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_aeskeygenassist:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 # sched: [20:6.00]
-; SKYLAKE-NEXT: vaeskeygenassist $7, (%rdi), %xmm1 # sched: [25:6.00]
-; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_aeskeygenassist:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: aeskeygenassist $7, %xmm0, %xmm1 # sched: [20:6.00]
-; SKX-SSE-NEXT: aeskeygenassist $7, (%rdi), %xmm0 # sched: [25:6.00]
-; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_aeskeygenassist:
-; SKX: # %bb.0:
-; SKX-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 # sched: [20:6.00]
-; SKX-NEXT: vaeskeygenassist $7, (%rdi), %xmm1 # sched: [25:6.00]
-; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_aeskeygenassist:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: aeskeygenassist $7, %xmm0, %xmm1 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: aeskeygenassist $7, (%rdi), %xmm0 # sched: [10:1.00]
-; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_aeskeygenassist:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vaeskeygenassist $7, (%rdi), %xmm1 # sched: [10:1.00]
-; BDVER2-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_aeskeygenassist:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: aeskeygenassist $7, %xmm0, %xmm1 # sched: [2:1.00]
-; BTVER2-SSE-NEXT: aeskeygenassist $7, (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_aeskeygenassist:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vaeskeygenassist $7, (%rdi), %xmm1 # sched: [7:1.00]
-; BTVER2-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_aeskeygenassist:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: aeskeygenassist $7, %xmm0, %xmm1 # sched: [4:0.50]
-; ZNVER1-SSE-NEXT: aeskeygenassist $7, (%rdi), %xmm0 # sched: [11:0.50]
-; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_aeskeygenassist:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vaeskeygenassist $7, (%rdi), %xmm1 # sched: [11:0.50]
-; ZNVER1-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 # sched: [4:0.50]
-; ZNVER1-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load <2 x i64>, <2 x i64> *%a1, align 16
- %2 = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7)
- %3 = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %1, i8 7)
- %4 = or <2 x i64> %2, %3
- ret <2 x i64> %4
-}
-declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8)
diff --git a/llvm/test/CodeGen/X86/avx-schedule.ll b/llvm/test/CodeGen/X86/avx-schedule.ll
deleted file mode 100644
index caf4c979e2e..00000000000
--- a/llvm/test/CodeGen/X86/avx-schedule.ll
+++ /dev/null
@@ -1,6120 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
-
-define <4 x double> @test_addpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
-; GENERIC-LABEL: test_addpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_addpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_addpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_addpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_addpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_addpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_addpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_addpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_addpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fadd <4 x double> %a0, %a1
- %2 = load <4 x double>, <4 x double> *%a2, align 32
- %3 = fadd <4 x double> %1, %2
- ret <4 x double> %3
-}
-
-define <8 x float> @test_addps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
-; GENERIC-LABEL: test_addps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_addps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_addps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_addps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_addps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_addps:
-; SKX: # %bb.0:
-; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_addps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_addps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_addps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fadd <8 x float> %a0, %a1
- %2 = load <8 x float>, <8 x float> *%a2, align 32
- %3 = fadd <8 x float> %1, %2
- ret <8 x float> %3
-}
-
-define <4 x double> @test_addsubpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
-; GENERIC-LABEL: test_addsubpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_addsubpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_addsubpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_addsubpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_addsubpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_addsubpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_addsubpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_addsubpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_addsubpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1)
- %2 = load <4 x double>, <4 x double> *%a2, align 32
- %3 = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %1, <4 x double> %2)
- ret <4 x double> %3
-}
-declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
-
-define <8 x float> @test_addsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
-; GENERIC-LABEL: test_addsubps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_addsubps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_addsubps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_addsubps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_addsubps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_addsubps:
-; SKX: # %bb.0:
-; SKX-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_addsubps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_addsubps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_addsubps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1)
- %2 = load <8 x float>, <8 x float> *%a2, align 32
- %3 = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %1, <8 x float> %2)
- ret <8 x float> %3
-}
-declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
-
-define <4 x double> @test_andnotpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
-; GENERIC-LABEL: test_andnotpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_andnotpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; SANDY-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_andnotpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; HASWELL-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_andnotpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_andnotpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_andnotpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_andnotpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
-; BDVER2-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; BDVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_andnotpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; BTVER2-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [6:2.00]
-; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_andnotpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = bitcast <4 x double> %a0 to <4 x i64>
- %2 = bitcast <4 x double> %a1 to <4 x i64>
- %3 = xor <4 x i64> %1, <i64 -1, i64 -1, i64 -1, i64 -1>
- %4 = and <4 x i64> %3, %2
- %5 = load <4 x double>, <4 x double> *%a2, align 32
- %6 = bitcast <4 x double> %5 to <4 x i64>
- %7 = xor <4 x i64> %4, <i64 -1, i64 -1, i64 -1, i64 -1>
- %8 = and <4 x i64> %6, %7
- %9 = bitcast <4 x i64> %8 to <4 x double>
- %10 = fadd <4 x double> %a1, %9
- ret <4 x double> %10
-}
-
-define <8 x float> @test_andnotps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
-; GENERIC-LABEL: test_andnotps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_andnotps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; SANDY-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_andnotps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; HASWELL-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_andnotps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_andnotps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_andnotps:
-; SKX: # %bb.0:
-; SKX-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_andnotps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
-; BDVER2-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_andnotps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; BTVER2-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [6:2.00]
-; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_andnotps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = bitcast <8 x float> %a0 to <4 x i64>
- %2 = bitcast <8 x float> %a1 to <4 x i64>
- %3 = xor <4 x i64> %1, <i64 -1, i64 -1, i64 -1, i64 -1>
- %4 = and <4 x i64> %3, %2
- %5 = load <8 x float>, <8 x float> *%a2, align 32
- %6 = bitcast <8 x float> %5 to <4 x i64>
- %7 = xor <4 x i64> %4, <i64 -1, i64 -1, i64 -1, i64 -1>
- %8 = and <4 x i64> %6, %7
- %9 = bitcast <4 x i64> %8 to <8 x float>
- %10 = fadd <8 x float> %a1, %9
- ret <8 x float> %10
-}
-
-define <4 x double> @test_andpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
-; GENERIC-LABEL: test_andpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_andpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; SANDY-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_andpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; HASWELL-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_andpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_andpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_andpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_andpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
-; BDVER2-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; BDVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_andpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; BTVER2-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [6:2.00]
-; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_andpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = bitcast <4 x double> %a0 to <4 x i64>
- %2 = bitcast <4 x double> %a1 to <4 x i64>
- %3 = and <4 x i64> %1, %2
- %4 = load <4 x double>, <4 x double> *%a2, align 32
- %5 = bitcast <4 x double> %4 to <4 x i64>
- %6 = and <4 x i64> %3, %5
- %7 = bitcast <4 x i64> %6 to <4 x double>
- %8 = fadd <4 x double> %a1, %7
- ret <4 x double> %8
-}
-
-define <8 x float> @test_andps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
-; GENERIC-LABEL: test_andps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_andps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; SANDY-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_andps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; HASWELL-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_andps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_andps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_andps:
-; SKX: # %bb.0:
-; SKX-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_andps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
-; BDVER2-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_andps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; BTVER2-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [6:2.00]
-; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_andps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = bitcast <8 x float> %a0 to <4 x i64>
- %2 = bitcast <8 x float> %a1 to <4 x i64>
- %3 = and <4 x i64> %1, %2
- %4 = load <8 x float>, <8 x float> *%a2, align 32
- %5 = bitcast <8 x float> %4 to <4 x i64>
- %6 = and <4 x i64> %3, %5
- %7 = bitcast <4 x i64> %6 to <8 x float>
- %8 = fadd <8 x float> %a1, %7
- ret <8 x float> %8
-}
-
-define <4 x double> @test_blendpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
-; GENERIC-LABEL: test_blendpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50]
-; GENERIC-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1],mem[2,3] sched: [8:0.50]
-; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_blendpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50]
-; SANDY-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1],mem[2,3] sched: [8:0.50]
-; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_blendpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33]
-; HASWELL-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1],mem[2,3] sched: [8:0.50]
-; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_blendpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33]
-; BROADWELL-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1],mem[2,3] sched: [7:0.50]
-; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_blendpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33]
-; SKYLAKE-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1],mem[2,3] sched: [8:0.50]
-; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_blendpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33]
-; SKX-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1],mem[2,3] sched: [8:0.50]
-; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_blendpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [2:1.00]
-; BDVER2-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1],mem[2,3] sched: [7:1.00]
-; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_blendpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:1.00]
-; BTVER2-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1],mem[2,3] sched: [6:2.00]
-; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_blendpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50]
-; ZNVER1-NEXT: vblendpd {{.*#+}} ymm1 = ymm0[0,1],mem[2,3] sched: [8:0.50]
-; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
- %2 = load <4 x double>, <4 x double> *%a2, align 32
- %3 = shufflevector <4 x double> %1, <4 x double> %2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
- %4 = fadd <4 x double> %1, %3
- ret <4 x double> %4
-}
-
-define <8 x float> @test_blendps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
-; GENERIC-LABEL: test_blendps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50]
-; GENERIC-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [8:0.50]
-; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_blendps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50]
-; SANDY-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [8:0.50]
-; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_blendps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.33]
-; HASWELL-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [8:0.50]
-; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_blendps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.33]
-; BROADWELL-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [7:0.50]
-; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_blendps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.33]
-; SKYLAKE-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [8:0.50]
-; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_blendps:
-; SKX: # %bb.0:
-; SKX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.33]
-; SKX-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [8:0.50]
-; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_blendps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [2:1.00]
-; BDVER2-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [7:1.00]
-; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_blendps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:1.00]
-; BTVER2-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [6:2.00]
-; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_blendps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50]
-; ZNVER1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [8:0.50]
-; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 7>
- %2 = load <8 x float>, <8 x float> *%a2, align 32
- %3 = shufflevector <8 x float> %a1, <8 x float> %2, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 13, i32 14, i32 7>
- %4 = fadd <8 x float> %1, %3
- ret <8 x float> %4
-}
-
-define <4 x double> @test_blendvpd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) {
-; GENERIC-LABEL: test_blendvpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
-; GENERIC-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_blendvpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
-; SANDY-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_blendvpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
-; HASWELL-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_blendvpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
-; BROADWELL-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_blendvpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67]
-; SKYLAKE-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:0.67]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_blendvpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67]
-; SKX-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_blendvpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:3.00]
-; BDVER2-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [7:3.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_blendvpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [3:3.00]
-; BTVER2-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:3.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_blendvpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; ZNVER1-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
- %2 = load <4 x double>, <4 x double> *%a3, align 32
- %3 = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %1, <4 x double> %2, <4 x double> %a2)
- ret <4 x double> %3
-}
-declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
-
-define <8 x float> @test_blendvps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) {
-; GENERIC-LABEL: test_blendvps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
-; GENERIC-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_blendvps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
-; SANDY-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_blendvps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
-; HASWELL-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_blendvps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
-; BROADWELL-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_blendvps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67]
-; SKYLAKE-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:0.67]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_blendvps:
-; SKX: # %bb.0:
-; SKX-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67]
-; SKX-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_blendvps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:3.00]
-; BDVER2-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [7:3.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_blendvps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [3:3.00]
-; BTVER2-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:3.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_blendvps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; ZNVER1-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
- %2 = load <8 x float>, <8 x float> *%a3, align 32
- %3 = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %1, <8 x float> %2, <8 x float> %a2)
- ret <8 x float> %3
-}
-declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
-
-define <8 x float> @test_broadcastf128(<4 x float> *%a0) {
-; GENERIC-LABEL: test_broadcastf128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_broadcastf128:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_broadcastf128:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_broadcastf128:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_broadcastf128:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_broadcastf128:
-; SKX: # %bb.0:
-; SKX-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_broadcastf128:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_broadcastf128:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [6:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_broadcastf128:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load <4 x float>, <4 x float> *%a0, align 32
- %2 = shufflevector <4 x float> %1, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
- ret <8 x float> %2
-}
-
-define <4 x double> @test_broadcastsd_ymm(double *%a0) {
-; GENERIC-LABEL: test_broadcastsd_ymm:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_broadcastsd_ymm:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_broadcastsd_ymm:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_broadcastsd_ymm:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_broadcastsd_ymm:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_broadcastsd_ymm:
-; SKX: # %bb.0:
-; SKX-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_broadcastsd_ymm:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [6:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_broadcastsd_ymm:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [6:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_broadcastsd_ymm:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load double, double *%a0, align 8
- %2 = insertelement <4 x double> undef, double %1, i32 0
- %3 = shufflevector <4 x double> %2, <4 x double> undef, <4 x i32> zeroinitializer
- ret <4 x double> %3
-}
-
-define <4 x float> @test_broadcastss(float *%a0) {
-; GENERIC-LABEL: test_broadcastss:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_broadcastss:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_broadcastss:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_broadcastss:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [5:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_broadcastss:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_broadcastss:
-; SKX: # %bb.0:
-; SKX-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_broadcastss:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_broadcastss:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_broadcastss:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load float, float *%a0, align 4
- %2 = insertelement <4 x float> undef, float %1, i32 0
- %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> zeroinitializer
- ret <4 x float> %3
-}
-
-define <8 x float> @test_broadcastss_ymm(float *%a0) {
-; GENERIC-LABEL: test_broadcastss_ymm:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_broadcastss_ymm:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_broadcastss_ymm:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_broadcastss_ymm:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_broadcastss_ymm:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_broadcastss_ymm:
-; SKX: # %bb.0:
-; SKX-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_broadcastss_ymm:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [6:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_broadcastss_ymm:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [6:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_broadcastss_ymm:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load float, float *%a0, align 4
- %2 = insertelement <8 x float> undef, float %1, i32 0
- %3 = shufflevector <8 x float> %2, <8 x float> undef, <8 x i32> zeroinitializer
- ret <8 x float> %3
-}
-
-define <4 x double> @test_cmppd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
-; GENERIC-LABEL: test_cmppd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
-; GENERIC-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; GENERIC-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cmppd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
-; SANDY-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; SANDY-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_cmppd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
-; HASWELL-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; HASWELL-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cmppd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
-; BROADWELL-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
-; BROADWELL-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cmppd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [4:0.50]
-; SKYLAKE-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKYLAKE-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cmppd:
-; SKX: # %bb.0:
-; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [4:0.50]
-; SKX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKX-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_cmppd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [2:2.00]
-; BDVER2-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
-; BDVER2-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [2:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_cmppd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [2:2.00]
-; BTVER2-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
-; BTVER2-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_cmppd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
-; ZNVER1-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; ZNVER1-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fcmp oeq <4 x double> %a0, %a1
- %2 = load <4 x double>, <4 x double> *%a2, align 32
- %3 = fcmp oeq <4 x double> %a0, %2
- %4 = sext <4 x i1> %1 to <4 x i64>
- %5 = sext <4 x i1> %3 to <4 x i64>
- %6 = or <4 x i64> %4, %5
- %7 = bitcast <4 x i64> %6 to <4 x double>
- ret <4 x double> %7
-}
-
-define <8 x float> @test_cmpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
-; GENERIC-LABEL: test_cmpps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
-; GENERIC-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; GENERIC-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cmpps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
-; SANDY-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; SANDY-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_cmpps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
-; HASWELL-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; HASWELL-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cmpps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
-; BROADWELL-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
-; BROADWELL-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cmpps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [4:0.50]
-; SKYLAKE-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKYLAKE-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cmpps:
-; SKX: # %bb.0:
-; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [4:0.50]
-; SKX-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKX-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_cmpps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [2:2.00]
-; BDVER2-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
-; BDVER2-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [2:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_cmpps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [2:2.00]
-; BTVER2-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
-; BTVER2-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_cmpps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
-; ZNVER1-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; ZNVER1-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fcmp oeq <8 x float> %a0, %a1
- %2 = load <8 x float>, <8 x float> *%a2, align 32
- %3 = fcmp oeq <8 x float> %a0, %2
- %4 = sext <8 x i1> %1 to <8 x i32>
- %5 = sext <8 x i1> %3 to <8 x i32>
- %6 = or <8 x i32> %4, %5
- %7 = bitcast <8 x i32> %6 to <8 x float>
- ret <8 x float> %7
-}
-
-define <4 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) {
-; GENERIC-LABEL: test_cvtdq2pd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00]
-; GENERIC-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [10:1.00]
-; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvtdq2pd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00]
-; SANDY-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [10:1.00]
-; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_cvtdq2pd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [6:1.00]
-; HASWELL-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [12:1.00]
-; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvtdq2pd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [11:1.00]
-; BROADWELL-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [6:1.00]
-; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvtdq2pd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00]
-; SKYLAKE-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [13:1.00]
-; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvtdq2pd:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00]
-; SKX-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [13:1.00]
-; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_cvtdq2pd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [13:2.00]
-; BDVER2-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [8:2.00]
-; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_cvtdq2pd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [8:2.00]
-; BTVER2-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_cvtdq2pd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [12:1.00]
-; ZNVER1-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [5:1.00]
-; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = sitofp <4 x i32> %a0 to <4 x double>
- %2 = load <4 x i32>, <4 x i32> *%a1, align 16
- %3 = sitofp <4 x i32> %2 to <4 x double>
- %4 = fadd <4 x double> %1, %3
- ret <4 x double> %4
-}
-
-define <8 x float> @test_cvtdq2ps(<8 x i32> %a0, <8 x i32> *%a1) {
-; GENERIC-LABEL: test_cvtdq2ps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [10:1.00]
-; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvtdq2ps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50]
-; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm1, %ymm1 # sched: [7:0.50]
-; SANDY-NEXT: vcvtdq2ps %ymm1, %ymm1 # sched: [3:1.00]
-; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_cvtdq2ps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [10:1.00]
-; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvtdq2ps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [9:1.00]
-; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvtdq2ps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [11:0.50]
-; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvtdq2ps:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [11:0.50]
-; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_cvtdq2ps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [9:2.00]
-; BDVER2-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:2.00]
-; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_cvtdq2ps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [8:2.00]
-; BTVER2-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_cvtdq2ps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [12:1.00]
-; ZNVER1-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [5:1.00]
-; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = sitofp <8 x i32> %a0 to <8 x float>
- %2 = load <8 x i32>, <8 x i32> *%a1, align 16
- %3 = sitofp <8 x i32> %2 to <8 x float>
- %4 = fadd <8 x float> %1, %3
- ret <8 x float> %4
-}
-
-define <8 x i32> @test_cvtpd2dq(<4 x double> %a0, <4 x double> *%a1) {
-; GENERIC-LABEL: test_cvtpd2dq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [4:1.00]
-; GENERIC-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [11:1.00]
-; GENERIC-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvtpd2dq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [4:1.00]
-; SANDY-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [11:1.00]
-; SANDY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_cvtpd2dq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [6:1.00]
-; HASWELL-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [8:1.00]
-; HASWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvtpd2dq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [6:1.00]
-; BROADWELL-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [8:1.00]
-; BROADWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvtpd2dq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [7:1.00]
-; SKYLAKE-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [8:1.00]
-; SKYLAKE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvtpd2dq:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [7:1.00]
-; SKX-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [8:0.50]
-; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_cvtpd2dq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [13:2.00]
-; BDVER2-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [8:2.00]
-; BDVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_cvtpd2dq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [11:2.00]
-; BTVER2-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [6:2.00]
-; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_cvtpd2dq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [12:1.00]
-; ZNVER1-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [5:1.00]
-; ZNVER1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [2:0.67]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0)
- %2 = load <4 x double>, <4 x double> *%a1, align 32
- %3 = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %2)
- %4 = shufflevector <4 x i32> %1, <4 x i32> %3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
- ret <8 x i32> %4
-}
-declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone
-
-define <8 x i32> @test_cvttpd2dq(<4 x double> %a0, <4 x double> *%a1) {
-; GENERIC-LABEL: test_cvttpd2dq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [4:1.00]
-; GENERIC-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [11:1.00]
-; GENERIC-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvttpd2dq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [4:1.00]
-; SANDY-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [11:1.00]
-; SANDY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_cvttpd2dq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [6:1.00]
-; HASWELL-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:1.00]
-; HASWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvttpd2dq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [6:1.00]
-; BROADWELL-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:1.00]
-; BROADWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvttpd2dq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [7:1.00]
-; SKYLAKE-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:1.00]
-; SKYLAKE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvttpd2dq:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [7:1.00]
-; SKX-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:0.50]
-; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_cvttpd2dq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [13:2.00]
-; BDVER2-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [8:2.00]
-; BDVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_cvttpd2dq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [11:2.00]
-; BTVER2-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [6:2.00]
-; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_cvttpd2dq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [12:1.00]
-; ZNVER1-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [5:1.00]
-; ZNVER1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [2:0.67]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fptosi <4 x double> %a0 to <4 x i32>
- %2 = load <4 x double>, <4 x double> *%a1, align 32
- %3 = fptosi <4 x double> %2 to <4 x i32>
- %4 = shufflevector <4 x i32> %1, <4 x i32> %3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
- ret <8 x i32> %4
-}
-
-define <8 x float> @test_cvtpd2ps(<4 x double> %a0, <4 x double> *%a1) {
-; GENERIC-LABEL: test_cvtpd2ps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [4:1.00]
-; GENERIC-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [11:1.00]
-; GENERIC-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvtpd2ps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [4:1.00]
-; SANDY-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [11:1.00]
-; SANDY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_cvtpd2ps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [6:1.00]
-; HASWELL-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [8:1.00]
-; HASWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvtpd2ps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [6:1.00]
-; BROADWELL-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [8:1.00]
-; BROADWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvtpd2ps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [7:1.00]
-; SKYLAKE-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [8:1.00]
-; SKYLAKE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvtpd2ps:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [7:1.00]
-; SKX-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [8:1.00]
-; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_cvtpd2ps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [13:2.00]
-; BDVER2-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [8:2.00]
-; BDVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_cvtpd2ps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [11:2.00]
-; BTVER2-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [6:2.00]
-; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_cvtpd2ps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [11:1.00]
-; ZNVER1-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [5:1.00]
-; ZNVER1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [2:0.67]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fptrunc <4 x double> %a0 to <4 x float>
- %2 = load <4 x double>, <4 x double> *%a1, align 32
- %3 = fptrunc <4 x double> %2 to <4 x float>
- %4 = shufflevector <4 x float> %1, <4 x float> %3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
- ret <8 x float> %4
-}
-
-define <8 x i32> @test_cvtps2dq(<8 x float> %a0, <8 x float> *%a1) {
-; GENERIC-LABEL: test_cvtps2dq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [10:1.00]
-; GENERIC-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvtps2dq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [10:1.00]
-; SANDY-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_cvtps2dq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [10:1.00]
-; HASWELL-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvtps2dq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [9:1.00]
-; BROADWELL-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvtps2dq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [11:0.50]
-; SKYLAKE-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvtps2dq:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [11:0.50]
-; SKX-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_cvtps2dq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [9:2.00]
-; BDVER2-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [4:2.00]
-; BDVER2-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_cvtps2dq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [8:2.00]
-; BTVER2-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_cvtps2dq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [12:1.00]
-; ZNVER1-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [5:1.00]
-; ZNVER1-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0)
- %2 = load <8 x float>, <8 x float> *%a1, align 32
- %3 = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %2)
- %4 = or <8 x i32> %1, %3
- ret <8 x i32> %4
-}
-declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone
-
-define <8 x i32> @test_cvttps2dq(<8 x float> %a0, <8 x float> *%a1) {
-; GENERIC-LABEL: test_cvttps2dq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [10:1.00]
-; GENERIC-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvttps2dq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [10:1.00]
-; SANDY-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_cvttps2dq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [10:1.00]
-; HASWELL-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvttps2dq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [9:1.00]
-; BROADWELL-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvttps2dq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [11:0.50]
-; SKYLAKE-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvttps2dq:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:0.50]
-; SKX-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [11:0.50]
-; SKX-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_cvttps2dq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [9:2.00]
-; BDVER2-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [4:2.00]
-; BDVER2-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_cvttps2dq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [8:2.00]
-; BTVER2-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_cvttps2dq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [12:1.00]
-; ZNVER1-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [5:1.00]
-; ZNVER1-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fptosi <8 x float> %a0 to <8 x i32>
- %2 = load <8 x float>, <8 x float> *%a1, align 32
- %3 = fptosi <8 x float> %2 to <8 x i32>
- %4 = or <8 x i32> %1, %3
- ret <8 x i32> %4
-}
-
-define <4 x double> @test_divpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
-; GENERIC-LABEL: test_divpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [45:44.00]
-; GENERIC-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [52:44.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_divpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [45:44.00]
-; SANDY-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [52:44.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_divpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [35:28.00]
-; HASWELL-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [42:28.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_divpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [23:16.00]
-; BROADWELL-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [29:16.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_divpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [14:5.00]
-; SKYLAKE-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [21:8.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_divpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [14:5.00]
-; SKX-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [21:8.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_divpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [9:19.00]
-; BDVER2-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [14:19.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_divpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [38:38.00]
-; BTVER2-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [43:38.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_divpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [15:15.00]
-; ZNVER1-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [22:22.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fdiv <4 x double> %a0, %a1
- %2 = load <4 x double>, <4 x double> *%a2, align 32
- %3 = fdiv <4 x double> %1, %2
- ret <4 x double> %3
-}
-
-define <8 x float> @test_divps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
-; GENERIC-LABEL: test_divps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [29:28.00]
-; GENERIC-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [36:28.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_divps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [29:28.00]
-; SANDY-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [36:28.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_divps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [21:14.00]
-; HASWELL-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [28:14.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_divps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [17:10.00]
-; BROADWELL-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [23:10.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_divps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [11:5.00]
-; SKYLAKE-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [18:5.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_divps:
-; SKX: # %bb.0:
-; SKX-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [11:5.00]
-; SKX-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [18:5.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_divps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [9:19.00]
-; BDVER2-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [14:19.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_divps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [38:38.00]
-; BTVER2-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [43:38.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_divps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [12:12.00]
-; ZNVER1-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [19:19.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fdiv <8 x float> %a0, %a1
- %2 = load <8 x float>, <8 x float> *%a2, align 32
- %3 = fdiv <8 x float> %1, %2
- ret <8 x float> %3
-}
-
-define <8 x float> @test_dpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
-; GENERIC-LABEL: test_dpps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [12:2.00]
-; GENERIC-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [19:2.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_dpps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [12:2.00]
-; SANDY-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [19:2.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_dpps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [14:2.00]
-; HASWELL-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [21:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_dpps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [14:2.00]
-; BROADWELL-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [20:2.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_dpps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [13:1.50]
-; SKYLAKE-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [20:1.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_dpps:
-; SKX: # %bb.0:
-; SKX-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [13:1.33]
-; SKX-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [20:1.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_dpps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [27:3.00]
-; BDVER2-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [32:3.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_dpps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [12:6.00]
-; BTVER2-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [17:6.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_dpps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [100:0.25]
-; ZNVER1-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7)
- %2 = load <8 x float>, <8 x float> *%a2, align 32
- %3 = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %1, <8 x float> %2, i8 7)
- ret <8 x float> %3
-}
-declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
-
-define <4 x float> @test_extractf128(<8 x float> %a0, <8 x float> %a1, <4 x float> *%a2) {
-; GENERIC-LABEL: test_extractf128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_extractf128:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00]
-; SANDY-NEXT: vzeroupper # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_extractf128:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00]
-; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_extractf128:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00]
-; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_extractf128:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00]
-; SKYLAKE-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00]
-; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_extractf128:
-; SKX: # %bb.0:
-; SKX-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00]
-; SKX-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_extractf128:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [7:0.50]
-; BDVER2-NEXT: vzeroupper # sched: [46:4.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_extractf128:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_extractf128:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:0.33]
-; ZNVER1-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [8:0.50]
-; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <8 x float> %a0, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
- %2 = shufflevector <8 x float> %a1, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
- store <4 x float> %2, <4 x float> *%a2
- ret <4 x float> %1
-}
-
-define <4 x double> @test_haddpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
-; GENERIC-LABEL: test_haddpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; GENERIC-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_haddpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; SANDY-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_haddpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; HASWELL-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_haddpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; BROADWELL-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [11:2.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_haddpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [6:2.00]
-; SKYLAKE-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_haddpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [6:2.00]
-; SKX-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_haddpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [11:2.00]
-; BDVER2-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [16:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_haddpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [4:2.00]
-; BTVER2-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_haddpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [100:0.25]
-; ZNVER1-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1)
- %2 = load <4 x double>, <4 x double> *%a2, align 32
- %3 = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %1, <4 x double> %2)
- ret <4 x double> %3
-}
-declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone
-
-define <8 x float> @test_haddps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
-; GENERIC-LABEL: test_haddps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; GENERIC-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_haddps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; SANDY-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_haddps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; HASWELL-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_haddps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; BROADWELL-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [11:2.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_haddps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [6:2.00]
-; SKYLAKE-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_haddps:
-; SKX: # %bb.0:
-; SKX-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [6:2.00]
-; SKX-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_haddps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [11:2.00]
-; BDVER2-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [16:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_haddps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [4:2.00]
-; BTVER2-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_haddps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [100:0.25]
-; ZNVER1-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1)
- %2 = load <8 x float>, <8 x float> *%a2, align 32
- %3 = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %1, <8 x float> %2)
- ret <8 x float> %3
-}
-declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone
-
-define <4 x double> @test_hsubpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
-; GENERIC-LABEL: test_hsubpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; GENERIC-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_hsubpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; SANDY-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_hsubpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; HASWELL-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_hsubpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; BROADWELL-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [11:2.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_hsubpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [6:2.00]
-; SKYLAKE-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_hsubpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [6:2.00]
-; SKX-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_hsubpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [11:2.00]
-; BDVER2-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [16:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_hsubpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [4:2.00]
-; BTVER2-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_hsubpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [100:0.25]
-; ZNVER1-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1)
- %2 = load <4 x double>, <4 x double> *%a2, align 32
- %3 = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %1, <4 x double> %2)
- ret <4 x double> %3
-}
-declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
-
-define <8 x float> @test_hsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
-; GENERIC-LABEL: test_hsubps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; GENERIC-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_hsubps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; SANDY-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_hsubps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; HASWELL-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_hsubps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; BROADWELL-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [11:2.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_hsubps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [6:2.00]
-; SKYLAKE-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_hsubps:
-; SKX: # %bb.0:
-; SKX-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [6:2.00]
-; SKX-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_hsubps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [11:2.00]
-; BDVER2-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [16:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_hsubps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [4:2.00]
-; BTVER2-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_hsubps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [100:0.25]
-; ZNVER1-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1)
- %2 = load <8 x float>, <8 x float> *%a2, align 32
- %3 = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %1, <8 x float> %2)
- ret <8 x float> %3
-}
-declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
-
-define <8 x float> @test_insertf128(<8 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
-; GENERIC-LABEL: test_insertf128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00]
-; GENERIC-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_insertf128:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00]
-; SANDY-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_insertf128:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
-; HASWELL-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_insertf128:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
-; BROADWELL-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [6:0.50]
-; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_insertf128:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
-; SKYLAKE-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_insertf128:
-; SKX: # %bb.0:
-; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
-; SKX-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_insertf128:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [2:0.50]
-; BDVER2-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_insertf128:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00]
-; BTVER2-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
-; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_insertf128:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [2:0.67]
-; ZNVER1-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [9:0.67]
-; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <4 x float> %a1, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
- %2 = shufflevector <8 x float> %a0, <8 x float> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
- %3 = load <4 x float>, <4 x float> *%a2, align 16
- %4 = shufflevector <4 x float> %3, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
- %5 = shufflevector <8 x float> %a0, <8 x float> %4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
- %6 = fadd <8 x float> %2, %5
- ret <8 x float> %6
-}
-
-define <32 x i8> @test_lddqu(i8* %a0) {
-; GENERIC-LABEL: test_lddqu:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vlddqu (%rdi), %ymm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_lddqu:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vlddqu (%rdi), %ymm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_lddqu:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vlddqu (%rdi), %ymm0 # sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_lddqu:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vlddqu (%rdi), %ymm0 # sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_lddqu:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vlddqu (%rdi), %ymm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_lddqu:
-; SKX: # %bb.0:
-; SKX-NEXT: vlddqu (%rdi), %ymm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_lddqu:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vlddqu (%rdi), %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_lddqu:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vlddqu (%rdi), %ymm0 # sched: [5:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_lddqu:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vlddqu (%rdi), %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0)
- ret <32 x i8> %1
-}
-declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly
-
-define <2 x double> @test_maskmovpd(i8* %a0, <2 x i64> %a1, <2 x double> %a2) {
-; GENERIC-LABEL: test_maskmovpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
-; GENERIC-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
-; GENERIC-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_maskmovpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
-; SANDY-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
-; SANDY-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_maskmovpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [8:2.00]
-; HASWELL-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
-; HASWELL-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_maskmovpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [7:2.00]
-; BROADWELL-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
-; BROADWELL-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_maskmovpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
-; SKYLAKE-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
-; SKYLAKE-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_maskmovpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
-; SKX-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
-; SKX-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_maskmovpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [6:1.00]
-; BDVER2-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [6:2.00]
-; BDVER2-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_maskmovpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [6:1.00]
-; BTVER2-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [6:2.00]
-; BTVER2-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_maskmovpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [8:0.50]
-; ZNVER1-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [4:0.50]
-; ZNVER1-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x i64> %a1)
- call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x i64> %a1, <2 x double> %a2)
- ret <2 x double> %1
-}
-declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x i64>) nounwind readonly
-declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x i64>, <2 x double>) nounwind
-
-define <4 x double> @test_maskmovpd_ymm(i8* %a0, <4 x i64> %a1, <4 x double> %a2) {
-; GENERIC-LABEL: test_maskmovpd_ymm:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [9:1.00]
-; GENERIC-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
-; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_maskmovpd_ymm:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [9:1.00]
-; SANDY-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
-; SANDY-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_maskmovpd_ymm:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [9:2.00]
-; HASWELL-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
-; HASWELL-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_maskmovpd_ymm:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [8:2.00]
-; BROADWELL-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
-; BROADWELL-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_maskmovpd_ymm:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
-; SKYLAKE-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
-; SKYLAKE-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_maskmovpd_ymm:
-; SKX: # %bb.0:
-; SKX-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
-; SKX-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
-; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_maskmovpd_ymm:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [6:2.00]
-; BDVER2-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [6:2.00]
-; BDVER2-NEXT: vmovapd %ymm2, %ymm0 # sched: [2:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_maskmovpd_ymm:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [6:2.00]
-; BTVER2-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [6:2.00]
-; BTVER2-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_maskmovpd_ymm:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [8:1.00]
-; ZNVER1-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
-; ZNVER1-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x i64> %a1)
- call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x i64> %a1, <4 x double> %a2)
- ret <4 x double> %1
-}
-declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x i64>) nounwind readonly
-declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x i64>, <4 x double>) nounwind
-
-define <4 x float> @test_maskmovps(i8* %a0, <4 x i32> %a1, <4 x float> %a2) {
-; GENERIC-LABEL: test_maskmovps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
-; GENERIC-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
-; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_maskmovps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
-; SANDY-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
-; SANDY-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_maskmovps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [8:2.00]
-; HASWELL-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
-; HASWELL-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_maskmovps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [7:2.00]
-; BROADWELL-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
-; BROADWELL-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_maskmovps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
-; SKYLAKE-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
-; SKYLAKE-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_maskmovps:
-; SKX: # %bb.0:
-; SKX-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
-; SKX-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
-; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_maskmovps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [6:1.00]
-; BDVER2-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [6:2.00]
-; BDVER2-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_maskmovps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [6:1.00]
-; BTVER2-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [6:2.00]
-; BTVER2-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_maskmovps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [8:0.50]
-; ZNVER1-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [4:0.50]
-; ZNVER1-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x i32> %a1)
- call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x i32> %a1, <4 x float> %a2)
- ret <4 x float> %1
-}
-declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x i32>) nounwind readonly
-declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x i32>, <4 x float>) nounwind
-
-define <8 x float> @test_maskmovps_ymm(i8* %a0, <8 x i32> %a1, <8 x float> %a2) {
-; GENERIC-LABEL: test_maskmovps_ymm:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [9:1.00]
-; GENERIC-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
-; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_maskmovps_ymm:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [9:1.00]
-; SANDY-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
-; SANDY-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_maskmovps_ymm:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [9:2.00]
-; HASWELL-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
-; HASWELL-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_maskmovps_ymm:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [8:2.00]
-; BROADWELL-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
-; BROADWELL-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_maskmovps_ymm:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
-; SKYLAKE-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
-; SKYLAKE-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_maskmovps_ymm:
-; SKX: # %bb.0:
-; SKX-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
-; SKX-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
-; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_maskmovps_ymm:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [6:2.00]
-; BDVER2-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [6:2.00]
-; BDVER2-NEXT: vmovaps %ymm2, %ymm0 # sched: [2:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_maskmovps_ymm:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [6:2.00]
-; BTVER2-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [6:2.00]
-; BTVER2-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_maskmovps_ymm:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [8:1.00]
-; ZNVER1-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
-; ZNVER1-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x i32> %a1)
- call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x i32> %a1, <8 x float> %a2)
- ret <8 x float> %1
-}
-declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x i32>) nounwind readonly
-declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x i32>, <8 x float>) nounwind
-
-define <4 x double> @test_maxpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
-; GENERIC-LABEL: test_maxpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_maxpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_maxpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_maxpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_maxpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_maxpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_maxpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
-; BDVER2-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_maxpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_maxpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1)
- %2 = load <4 x double>, <4 x double> *%a2, align 32
- %3 = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %1, <4 x double> %2)
- ret <4 x double> %3
-}
-declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone
-
-define <8 x float> @test_maxps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
-; GENERIC-LABEL: test_maxps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_maxps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_maxps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_maxps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_maxps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_maxps:
-; SKX: # %bb.0:
-; SKX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_maxps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
-; BDVER2-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_maxps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_maxps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1)
- %2 = load <8 x float>, <8 x float> *%a2, align 32
- %3 = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %1, <8 x float> %2)
- ret <8 x float> %3
-}
-declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone
-
-define <4 x double> @test_minpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
-; GENERIC-LABEL: test_minpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_minpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_minpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_minpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_minpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_minpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_minpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
-; BDVER2-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_minpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_minpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1)
- %2 = load <4 x double>, <4 x double> *%a2, align 32
- %3 = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %1, <4 x double> %2)
- ret <4 x double> %3
-}
-declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone
-
-define <8 x float> @test_minps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
-; GENERIC-LABEL: test_minps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_minps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_minps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_minps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_minps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_minps:
-; SKX: # %bb.0:
-; SKX-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_minps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
-; BDVER2-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_minps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_minps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1)
- %2 = load <8 x float>, <8 x float> *%a2, align 32
- %3 = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %1, <8 x float> %2)
- ret <8 x float> %3
-}
-declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone
-
-define <4 x double> @test_movapd(<4 x double> *%a0, <4 x double> *%a1) {
-; GENERIC-LABEL: test_movapd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50]
-; GENERIC-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movapd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50]
-; SANDY-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_movapd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50]
-; HASWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movapd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmovapd (%rdi), %ymm0 # sched: [6:0.50]
-; BROADWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movapd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movapd:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50]
-; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_movapd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovapd (%rdi), %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_movapd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovapd (%rdi), %ymm0 # sched: [5:1.00]
-; BTVER2-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_movapd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovapd (%rdi), %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load <4 x double>, <4 x double> *%a0, align 32
- %2 = fadd <4 x double> %1, %1
- store <4 x double> %2, <4 x double> *%a1, align 32
- ret <4 x double> %2
-}
-
-define <8 x float> @test_movaps(<8 x float> *%a0, <8 x float> *%a1) {
-; GENERIC-LABEL: test_movaps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50]
-; GENERIC-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movaps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50]
-; SANDY-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_movaps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50]
-; HASWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movaps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmovaps (%rdi), %ymm0 # sched: [6:0.50]
-; BROADWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movaps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movaps:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50]
-; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_movaps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovaps (%rdi), %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_movaps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps (%rdi), %ymm0 # sched: [5:1.00]
-; BTVER2-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_movaps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovaps (%rdi), %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load <8 x float>, <8 x float> *%a0, align 32
- %2 = fadd <8 x float> %1, %1
- store <8 x float> %2, <8 x float> *%a1, align 32
- ret <8 x float> %2
-}
-
-define <4 x double> @test_movddup(<4 x double> %a0, <4 x double> *%a1) {
-; GENERIC-LABEL: test_movddup:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00]
-; GENERIC-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50]
-; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movddup:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00]
-; SANDY-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50]
-; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_movddup:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00]
-; HASWELL-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50]
-; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movddup:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00]
-; BROADWELL-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [6:0.50]
-; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movddup:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00]
-; SKYLAKE-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50]
-; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movddup:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00]
-; SKX-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50]
-; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_movddup:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:1.00]
-; BDVER2-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [2:1.00]
-; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_movddup:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [6:2.00]
-; BTVER2-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00]
-; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_movddup:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [8:0.50]
-; ZNVER1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:0.50]
-; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
- %2 = load <4 x double>, <4 x double> *%a1, align 32
- %3 = shufflevector <4 x double> %2, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
- %4 = fadd <4 x double> %1, %3
- ret <4 x double> %4
-}
-
-define i32 @test_movmskpd(<4 x double> %a0) {
-; GENERIC-LABEL: test_movmskpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movmskpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00]
-; SANDY-NEXT: vzeroupper # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_movmskpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmovmskpd %ymm0, %eax # sched: [3:1.00]
-; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movmskpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmovmskpd %ymm0, %eax # sched: [3:1.00]
-; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movmskpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00]
-; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movmskpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_movmskpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovmskpd %ymm0, %eax # sched: [10:1.00]
-; BDVER2-NEXT: vzeroupper # sched: [46:4.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_movmskpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovmskpd %ymm0, %eax # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_movmskpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovmskpd %ymm0, %eax # sched: [1:1.00]
-; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0)
- ret i32 %1
-}
-declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone
-
-define i32 @test_movmskps(<8 x float> %a0) {
-; GENERIC-LABEL: test_movmskps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovmskps %ymm0, %eax # sched: [2:1.00]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movmskps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmovmskps %ymm0, %eax # sched: [2:1.00]
-; SANDY-NEXT: vzeroupper # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_movmskps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmovmskps %ymm0, %eax # sched: [3:1.00]
-; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movmskps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmovmskps %ymm0, %eax # sched: [3:1.00]
-; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movmskps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmovmskps %ymm0, %eax # sched: [2:1.00]
-; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movmskps:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovmskps %ymm0, %eax # sched: [2:1.00]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_movmskps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovmskps %ymm0, %eax # sched: [10:1.00]
-; BDVER2-NEXT: vzeroupper # sched: [46:4.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_movmskps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovmskps %ymm0, %eax # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_movmskps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovmskps %ymm0, %eax # sched: [1:1.00]
-; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0)
- ret i32 %1
-}
-declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
-
-define void @test_movntdq(<4 x i64> %a0, <4 x i64> *%a1) {
-; GENERIC-LABEL: test_movntdq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movntdq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: vzeroupper # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_movntdq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movntdq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movntdq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movntdq:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_movntdq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: vmovntdq %ymm0, (%rdi) # sched: [2:2.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: vzeroupper # sched: [46:4.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_movntdq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: vmovntdq %ymm0, (%rdi) # sched: [2:2.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_movntdq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "vmovntdq $0, $1", "x,*m"(<4 x i64> %a0, <4 x i64> *%a1)
- ret void
-}
-
-define <4 x double> @test_movntpd(<4 x double> %a0, <4 x double> *%a1) {
-; GENERIC-LABEL: test_movntpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movntpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_movntpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movntpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movntpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movntpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_movntpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: vmovntpd %ymm0, (%rdi) # sched: [3:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_movntpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vmovntpd %ymm0, (%rdi) # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_movntpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fadd <4 x double> %a0, %a0
- store <4 x double> %1, <4 x double> *%a1, align 32, !nontemporal !0
- ret <4 x double> %1
-}
-
-define <8 x float> @test_movntps(<8 x float> %a0, <8 x float> *%a1) {
-; GENERIC-LABEL: test_movntps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movntps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_movntps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movntps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movntps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movntps:
-; SKX: # %bb.0:
-; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_movntps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: vmovntps %ymm0, (%rdi) # sched: [3:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_movntps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vmovntps %ymm0, (%rdi) # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_movntps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fadd <8 x float> %a0, %a0
- store <8 x float> %1, <8 x float> *%a1, align 32, !nontemporal !0
- ret <8 x float> %1
-}
-
-define <8 x float> @test_movshdup(<8 x float> %a0, <8 x float> *%a1) {
-; GENERIC-LABEL: test_movshdup:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00]
-; GENERIC-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50]
-; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movshdup:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00]
-; SANDY-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50]
-; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_movshdup:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00]
-; HASWELL-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50]
-; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movshdup:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00]
-; BROADWELL-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [6:0.50]
-; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movshdup:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00]
-; SKYLAKE-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50]
-; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movshdup:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00]
-; SKX-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50]
-; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_movshdup:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:1.00]
-; BDVER2-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [2:1.00]
-; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_movshdup:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [6:2.00]
-; BTVER2-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00]
-; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_movshdup:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [8:0.50]
-; ZNVER1-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:0.50]
-; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
- %2 = load <8 x float>, <8 x float> *%a1, align 32
- %3 = shufflevector <8 x float> %2, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
- %4 = fadd <8 x float> %1, %3
- ret <8 x float> %4
-}
-
-define <8 x float> @test_movsldup(<8 x float> %a0, <8 x float> *%a1) {
-; GENERIC-LABEL: test_movsldup:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00]
-; GENERIC-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50]
-; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movsldup:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00]
-; SANDY-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50]
-; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_movsldup:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00]
-; HASWELL-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50]
-; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movsldup:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00]
-; BROADWELL-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [6:0.50]
-; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movsldup:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00]
-; SKYLAKE-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50]
-; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movsldup:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00]
-; SKX-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50]
-; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_movsldup:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:1.00]
-; BDVER2-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [2:1.00]
-; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_movsldup:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [6:2.00]
-; BTVER2-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00]
-; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_movsldup:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [8:0.50]
-; ZNVER1-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:0.50]
-; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
- %2 = load <8 x float>, <8 x float> *%a1, align 32
- %3 = shufflevector <8 x float> %2, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
- %4 = fadd <8 x float> %1, %3
- ret <8 x float> %4
-}
-
-define <4 x double> @test_movupd(<4 x double> *%a0, <4 x double> *%a1) {
-; GENERIC-LABEL: test_movupd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovupd (%rdi), %ymm0 # sched: [7:0.50]
-; GENERIC-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movupd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
-; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; SANDY-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [1:1.00]
-; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_movupd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmovupd (%rdi), %ymm0 # sched: [7:0.50]
-; HASWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movupd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmovupd (%rdi), %ymm0 # sched: [6:0.50]
-; BROADWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movupd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmovupd (%rdi), %ymm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movupd:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovupd (%rdi), %ymm0 # sched: [7:0.50]
-; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_movupd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovupd (%rdi), %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_movupd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovupd (%rdi), %ymm0 # sched: [5:1.00]
-; BTVER2-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_movupd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovupd (%rdi), %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load <4 x double>, <4 x double> *%a0, align 1
- %2 = fadd <4 x double> %1, %1
- store <4 x double> %2, <4 x double> *%a1, align 1
- ret <4 x double> %2
-}
-
-define <8 x float> @test_movups(<8 x float> *%a0, <8 x float> *%a1) {
-; GENERIC-LABEL: test_movups:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovups (%rdi), %ymm0 # sched: [7:0.50]
-; GENERIC-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movups:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
-; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; SANDY-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [1:1.00]
-; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_movups:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmovups (%rdi), %ymm0 # sched: [7:0.50]
-; HASWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movups:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmovups (%rdi), %ymm0 # sched: [6:0.50]
-; BROADWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movups:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmovups (%rdi), %ymm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movups:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovups (%rdi), %ymm0 # sched: [7:0.50]
-; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_movups:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovups (%rdi), %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_movups:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovups (%rdi), %ymm0 # sched: [5:1.00]
-; BTVER2-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_movups:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovups (%rdi), %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vmovups %ymm0, (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load <8 x float>, <8 x float> *%a0, align 1
- %2 = fadd <8 x float> %1, %1
- store <8 x float> %2, <8 x float> *%a1, align 1
- ret <8 x float> %2
-}
-
-define <4 x double> @test_mulpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
-; GENERIC-LABEL: test_mulpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; GENERIC-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_mulpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_mulpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; HASWELL-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [12:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_mulpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [3:0.50]
-; BROADWELL-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [9:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_mulpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_mulpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_mulpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_mulpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [4:4.00]
-; BTVER2-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [9:4.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_mulpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; ZNVER1-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fmul <4 x double> %a0, %a1
- %2 = load <4 x double>, <4 x double> *%a2, align 32
- %3 = fmul <4 x double> %1, %2
- ret <4 x double> %3
-}
-
-define <8 x float> @test_mulps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
-; GENERIC-LABEL: test_mulps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; GENERIC-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_mulps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_mulps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; HASWELL-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [12:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_mulps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [3:0.50]
-; BROADWELL-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [9:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_mulps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_mulps:
-; SKX: # %bb.0:
-; SKX-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_mulps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_mulps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [7:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_mulps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; ZNVER1-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fmul <8 x float> %a0, %a1
- %2 = load <8 x float>, <8 x float> *%a2, align 32
- %3 = fmul <8 x float> %1, %2
- ret <8 x float> %3
-}
-
-define <4 x double> @orpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
-; GENERIC-LABEL: orpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: orpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; SANDY-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: orpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; HASWELL-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: orpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: orpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: orpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: orpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
-; BDVER2-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; BDVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: orpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; BTVER2-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [6:2.00]
-; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: orpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = bitcast <4 x double> %a0 to <4 x i64>
- %2 = bitcast <4 x double> %a1 to <4 x i64>
- %3 = or <4 x i64> %1, %2
- %4 = load <4 x double>, <4 x double> *%a2, align 32
- %5 = bitcast <4 x double> %4 to <4 x i64>
- %6 = or <4 x i64> %3, %5
- %7 = bitcast <4 x i64> %6 to <4 x double>
- %8 = fadd <4 x double> %a1, %7
- ret <4 x double> %8
-}
-
-define <8 x float> @test_orps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
-; GENERIC-LABEL: test_orps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_orps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; SANDY-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_orps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; HASWELL-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_orps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_orps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_orps:
-; SKX: # %bb.0:
-; SKX-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_orps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
-; BDVER2-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_orps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; BTVER2-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [6:2.00]
-; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_orps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = bitcast <8 x float> %a0 to <4 x i64>
- %2 = bitcast <8 x float> %a1 to <4 x i64>
- %3 = or <4 x i64> %1, %2
- %4 = load <8 x float>, <8 x float> *%a2, align 32
- %5 = bitcast <8 x float> %4 to <4 x i64>
- %6 = or <4 x i64> %3, %5
- %7 = bitcast <4 x i64> %6 to <8 x float>
- %8 = fadd <8 x float> %a1, %7
- ret <8 x float> %8
-}
-
-define <4 x double> @test_perm2f128(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
-; GENERIC-LABEL: test_perm2f128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
-; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [8:1.00]
-; GENERIC-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_perm2f128:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
-; SANDY-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [8:1.00]
-; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_perm2f128:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
-; HASWELL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
-; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_perm2f128:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
-; BROADWELL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [9:1.00]
-; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_perm2f128:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
-; SKYLAKE-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
-; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_perm2f128:
-; SKX: # %bb.0:
-; SKX-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
-; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
-; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_perm2f128:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [4:0.50]
-; BDVER2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [8:0.50]
-; BDVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_perm2f128:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
-; BTVER2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [6:1.00]
-; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_perm2f128:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [100:0.25]
-; ZNVER1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [100:0.25]
-; ZNVER1-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
- %2 = load <4 x double>, <4 x double> *%a2, align 32
- %3 = shufflevector <4 x double> %a0, <4 x double> %2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
- %4 = fadd <4 x double> %1, %3
- ret <4 x double> %4
-}
-
-define <2 x double> @test_permilpd(<2 x double> %a0, <2 x double> *%a1) {
-; GENERIC-LABEL: test_permilpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00]
-; GENERIC-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00]
-; GENERIC-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_permilpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00]
-; SANDY-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00]
-; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_permilpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00]
-; HASWELL-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00]
-; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_permilpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00]
-; BROADWELL-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [6:1.00]
-; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_permilpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00]
-; SKYLAKE-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00]
-; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_permilpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00]
-; SKX-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00]
-; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_permilpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:0.50]
-; BDVER2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [2:0.50]
-; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_permilpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [6:1.00]
-; BTVER2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:0.50]
-; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_permilpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [8:0.50]
-; ZNVER1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:0.50]
-; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> <i32 1, i32 0>
- %2 = load <2 x double>, <2 x double> *%a1, align 16
- %3 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> <i32 1, i32 0>
- %4 = fadd <2 x double> %1, %3
- ret <2 x double> %4
-}
-
-define <4 x double> @test_permilpd_ymm(<4 x double> %a0, <4 x double> *%a1) {
-; GENERIC-LABEL: test_permilpd_ymm:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00]
-; GENERIC-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:1.00]
-; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_permilpd_ymm:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00]
-; SANDY-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:1.00]
-; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_permilpd_ymm:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00]
-; HASWELL-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:1.00]
-; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_permilpd_ymm:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00]
-; BROADWELL-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [7:1.00]
-; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_permilpd_ymm:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00]
-; SKYLAKE-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:1.00]
-; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_permilpd_ymm:
-; SKX: # %bb.0:
-; SKX-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00]
-; SKX-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:1.00]
-; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_permilpd_ymm:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [7:1.00]
-; BDVER2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [2:1.00]
-; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_permilpd_ymm:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [6:2.00]
-; BTVER2-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00]
-; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_permilpd_ymm:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:0.50]
-; ZNVER1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:0.50]
-; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 2, i32 3>
- %2 = load <4 x double>, <4 x double> *%a1, align 32
- %3 = shufflevector <4 x double> %2, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 2, i32 3>
- %4 = fadd <4 x double> %1, %3
- ret <4 x double> %4
-}
-
-define <4 x float> @test_permilps(<4 x float> %a0, <4 x float> *%a1) {
-; GENERIC-LABEL: test_permilps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00]
-; GENERIC-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00]
-; GENERIC-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_permilps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00]
-; SANDY-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00]
-; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_permilps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00]
-; HASWELL-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00]
-; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_permilps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00]
-; BROADWELL-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [6:1.00]
-; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_permilps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00]
-; SKYLAKE-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00]
-; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_permilps:
-; SKX: # %bb.0:
-; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00]
-; SKX-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00]
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_permilps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:0.50]
-; BDVER2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [2:0.50]
-; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_permilps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [6:1.00]
-; BTVER2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:0.50]
-; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_permilps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [8:0.50]
-; ZNVER1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:0.50]
-; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
- %2 = load <4 x float>, <4 x float> *%a1, align 16
- %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
- %4 = fadd <4 x float> %1, %3
- ret <4 x float> %4
-}
-
-define <8 x float> @test_permilps_ymm(<8 x float> %a0, <8 x float> *%a1) {
-; GENERIC-LABEL: test_permilps_ymm:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
-; GENERIC-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:1.00]
-; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_permilps_ymm:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
-; SANDY-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:1.00]
-; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_permilps_ymm:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
-; HASWELL-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:1.00]
-; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_permilps_ymm:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
-; BROADWELL-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [7:1.00]
-; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_permilps_ymm:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
-; SKYLAKE-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:1.00]
-; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_permilps_ymm:
-; SKX: # %bb.0:
-; SKX-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
-; SKX-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:1.00]
-; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_permilps_ymm:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [7:1.00]
-; BDVER2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [2:1.00]
-; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_permilps_ymm:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [6:2.00]
-; BTVER2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
-; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_permilps_ymm:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:0.50]
-; ZNVER1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:0.50]
-; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
- %2 = load <8 x float>, <8 x float> *%a1, align 32
- %3 = shufflevector <8 x float> %2, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
- %4 = fadd <8 x float> %1, %3
- ret <8 x float> %4
-}
-
-define <2 x double> @test_permilvarpd(<2 x double> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
-; GENERIC-LABEL: test_permilvarpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_permilvarpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_permilvarpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_permilvarpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_permilvarpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_permilvarpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SKX-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_permilvarpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; BDVER2-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_permilvarpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; BTVER2-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_permilvarpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; ZNVER1-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1)
- %2 = load <2 x i64>, <2 x i64> *%a2, align 16
- %3 = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %1, <2 x i64> %2)
- ret <2 x double> %3
-}
-declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone
-
-define <4 x double> @test_permilvarpd_ymm(<4 x double> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
-; GENERIC-LABEL: test_permilvarpd_ymm:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_permilvarpd_ymm:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; SANDY-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_permilvarpd_ymm:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; HASWELL-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_permilvarpd_ymm:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_permilvarpd_ymm:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_permilvarpd_ymm:
-; SKX: # %bb.0:
-; SKX-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; SKX-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_permilvarpd_ymm:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [3:3.00]
-; BDVER2-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:3.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_permilvarpd_ymm:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [4:3.00]
-; BTVER2-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [9:3.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_permilvarpd_ymm:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; ZNVER1-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1)
- %2 = load <4 x i64>, <4 x i64> *%a2, align 32
- %3 = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %1, <4 x i64> %2)
- ret <4 x double> %3
-}
-declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone
-
-define <4 x float> @test_permilvarps(<4 x float> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_permilvarps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_permilvarps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_permilvarps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_permilvarps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_permilvarps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_permilvarps:
-; SKX: # %bb.0:
-; SKX-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SKX-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_permilvarps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; BDVER2-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_permilvarps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; BTVER2-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_permilvarps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; ZNVER1-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1)
- %2 = load <4 x i32>, <4 x i32> *%a2, align 16
- %3 = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %1, <4 x i32> %2)
- ret <4 x float> %3
-}
-declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone
-
-define <8 x float> @test_permilvarps_ymm(<8 x float> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
-; GENERIC-LABEL: test_permilvarps_ymm:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_permilvarps_ymm:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; SANDY-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_permilvarps_ymm:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; HASWELL-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_permilvarps_ymm:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_permilvarps_ymm:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_permilvarps_ymm:
-; SKX: # %bb.0:
-; SKX-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; SKX-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_permilvarps_ymm:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [3:3.00]
-; BDVER2-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:3.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_permilvarps_ymm:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [4:3.00]
-; BTVER2-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [9:3.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_permilvarps_ymm:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; ZNVER1-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1)
- %2 = load <8 x i32>, <8 x i32> *%a2, align 32
- %3 = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %1, <8 x i32> %2)
- ret <8 x float> %3
-}
-declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone
-
-define <8 x float> @test_rcpps(<8 x float> %a0, <8 x float> *%a1) {
-; GENERIC-LABEL: test_rcpps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vrcpps (%rdi), %ymm1 # sched: [14:2.00]
-; GENERIC-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
-; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_rcpps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps (%rdi), %ymm1 # sched: [14:2.00]
-; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
-; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_rcpps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps (%rdi), %ymm1 # sched: [18:2.00]
-; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
-; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_rcpps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
-; BROADWELL-NEXT: vrcpps (%rdi), %ymm1 # sched: [17:2.00]
-; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_rcpps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vrcpps %ymm0, %ymm0 # sched: [4:1.00]
-; SKYLAKE-NEXT: vrcpps (%rdi), %ymm1 # sched: [11:1.00]
-; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_rcpps:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcpps %ymm0, %ymm0 # sched: [4:1.00]
-; SKX-NEXT: vrcpps (%rdi), %ymm1 # sched: [11:1.00]
-; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_rcpps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps (%rdi), %ymm1 # sched: [10:2.00]
-; BDVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_rcpps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vrcpps (%rdi), %ymm1 # sched: [7:2.00]
-; BTVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_rcpps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vrcpps (%rdi), %ymm1 # sched: [12:0.50]
-; ZNVER1-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:0.50]
-; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0)
- %2 = load <8 x float>, <8 x float> *%a1, align 32
- %3 = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %2)
- %4 = fadd <8 x float> %1, %3
- ret <8 x float> %4
-}
-declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone
-
-define <4 x double> @test_roundpd(<4 x double> %a0, <4 x double> *%a1) {
-; GENERIC-LABEL: test_roundpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [10:1.00]
-; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_roundpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [10:1.00]
-; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_roundpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [6:0.50]
-; HASWELL-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [13:2.00]
-; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_roundpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [12:2.00]
-; BROADWELL-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [6:0.50]
-; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_roundpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [8:1.00]
-; SKYLAKE-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [15:1.00]
-; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_roundpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [8:1.00]
-; SKX-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [15:1.00]
-; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_roundpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [9:2.00]
-; BDVER2-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [4:2.00]
-; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_roundpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [8:2.00]
-; BTVER2-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_roundpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [11:1.00]
-; ZNVER1-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [4:1.00]
-; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7)
- %2 = load <4 x double>, <4 x double> *%a1, align 32
- %3 = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %2, i32 7)
- %4 = fadd <4 x double> %1, %3
- ret <4 x double> %4
-}
-declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone
-
-define <8 x float> @test_roundps(<8 x float> %a0, <8 x float> *%a1) {
-; GENERIC-LABEL: test_roundps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [10:1.00]
-; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_roundps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [10:1.00]
-; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_roundps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [6:0.50]
-; HASWELL-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [13:2.00]
-; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_roundps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [12:2.00]
-; BROADWELL-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [6:0.50]
-; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_roundps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [8:1.00]
-; SKYLAKE-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [15:1.00]
-; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_roundps:
-; SKX: # %bb.0:
-; SKX-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [8:1.00]
-; SKX-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [15:1.00]
-; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_roundps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [9:2.00]
-; BDVER2-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [4:2.00]
-; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_roundps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [8:2.00]
-; BTVER2-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_roundps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [11:1.00]
-; ZNVER1-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [4:1.00]
-; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7)
- %2 = load <8 x float>, <8 x float> *%a1, align 32
- %3 = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %2, i32 7)
- %4 = fadd <8 x float> %1, %3
- ret <8 x float> %4
-}
-declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone
-
-define <8 x float> @test_rsqrtps(<8 x float> %a0, <8 x float> *%a1) {
-; GENERIC-LABEL: test_rsqrtps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [14:2.00]
-; GENERIC-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [7:2.00]
-; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_rsqrtps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [14:2.00]
-; SANDY-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [7:2.00]
-; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_rsqrtps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [18:2.00]
-; HASWELL-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [11:2.00]
-; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_rsqrtps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [11:2.00]
-; BROADWELL-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [17:2.00]
-; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_rsqrtps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [4:1.00]
-; SKYLAKE-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [11:1.00]
-; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_rsqrtps:
-; SKX: # %bb.0:
-; SKX-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [4:1.00]
-; SKX-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [11:1.00]
-; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_rsqrtps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [10:2.00]
-; BDVER2-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_rsqrtps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [7:2.00]
-; BTVER2-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_rsqrtps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [12:0.50]
-; ZNVER1-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [5:1.00]
-; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0)
- %2 = load <8 x float>, <8 x float> *%a1, align 32
- %3 = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %2)
- %4 = fadd <8 x float> %1, %3
- ret <8 x float> %4
-}
-declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
-
-define <4 x double> @test_shufpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
-; GENERIC-LABEL: test_shufpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00]
-; GENERIC-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00]
-; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_shufpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00]
-; SANDY-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00]
-; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_shufpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00]
-; HASWELL-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00]
-; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_shufpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00]
-; BROADWELL-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [7:1.00]
-; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_shufpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00]
-; SKYLAKE-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00]
-; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_shufpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00]
-; SKX-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00]
-; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_shufpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [2:1.00]
-; BDVER2-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [7:1.00]
-; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_shufpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00]
-; BTVER2-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [6:2.00]
-; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_shufpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:0.50]
-; ZNVER1-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:0.50]
-; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 1, i32 4, i32 2, i32 7>
- %2 = load <4 x double>, <4 x double> *%a2, align 32
- %3 = shufflevector <4 x double> %a1, <4 x double> %2, <4 x i32> <i32 1, i32 4, i32 2, i32 7>
- %4 = fadd <4 x double> %1, %3
- ret <4 x double> %4
-}
-
-define <8 x float> @test_shufps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) nounwind {
-; GENERIC-LABEL: test_shufps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00]
-; GENERIC-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [8:1.00]
-; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_shufps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00]
-; SANDY-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [8:1.00]
-; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_shufps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00]
-; HASWELL-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [8:1.00]
-; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_shufps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00]
-; BROADWELL-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [7:1.00]
-; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_shufps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00]
-; SKYLAKE-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [8:1.00]
-; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_shufps:
-; SKX: # %bb.0:
-; SKX-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00]
-; SKX-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [8:1.00]
-; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_shufps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [2:1.00]
-; BDVER2-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [7:1.00]
-; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_shufps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00]
-; BTVER2-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [6:2.00]
-; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_shufps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:0.50]
-; ZNVER1-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [8:0.50]
-; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 0, i32 8, i32 8, i32 4, i32 4, i32 12, i32 12>
- %2 = load <8 x float>, <8 x float> *%a2, align 32
- %3 = shufflevector <8 x float> %a1, <8 x float> %2, <8 x i32> <i32 0, i32 3, i32 8, i32 8, i32 4, i32 7, i32 12, i32 12>
- %4 = fadd <8 x float> %1, %3
- ret <8 x float> %4
-}
-
-define <4 x double> @test_sqrtpd(<4 x double> %a0, <4 x double> *%a1) {
-; GENERIC-LABEL: test_sqrtpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [52:44.00]
-; GENERIC-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [45:44.00]
-; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_sqrtpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [52:44.00]
-; SANDY-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [45:44.00]
-; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_sqrtpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [42:28.00]
-; HASWELL-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [35:28.00]
-; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_sqrtpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [29:28.00]
-; BROADWELL-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [35:28.00]
-; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_sqrtpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [18:12.00]
-; SKYLAKE-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [25:12.00]
-; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_sqrtpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [18:12.00]
-; SKX-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [25:12.00]
-; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_sqrtpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [14:27.00]
-; BDVER2-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [9:27.00]
-; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_sqrtpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [59:54.00]
-; BTVER2-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [54:54.00]
-; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_sqrtpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [47:40.00]
-; ZNVER1-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [40:40.00]
-; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0)
- %2 = load <4 x double>, <4 x double> *%a1, align 32
- %3 = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %2)
- %4 = fadd <4 x double> %1, %3
- ret <4 x double> %4
-}
-declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
-
-define <8 x float> @test_sqrtps(<8 x float> %a0, <8 x float> *%a1) {
-; GENERIC-LABEL: test_sqrtps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vsqrtps (%rdi), %ymm1 # sched: [36:28.00]
-; GENERIC-NEXT: vsqrtps %ymm0, %ymm0 # sched: [29:28.00]
-; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_sqrtps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vsqrtps (%rdi), %ymm1 # sched: [36:28.00]
-; SANDY-NEXT: vsqrtps %ymm0, %ymm0 # sched: [29:28.00]
-; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_sqrtps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vsqrtps (%rdi), %ymm1 # sched: [28:14.00]
-; HASWELL-NEXT: vsqrtps %ymm0, %ymm0 # sched: [21:14.00]
-; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_sqrtps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vsqrtps %ymm0, %ymm0 # sched: [21:14.00]
-; BROADWELL-NEXT: vsqrtps (%rdi), %ymm1 # sched: [27:14.00]
-; BROADWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_sqrtps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vsqrtps %ymm0, %ymm0 # sched: [12:6.00]
-; SKYLAKE-NEXT: vsqrtps (%rdi), %ymm1 # sched: [19:6.00]
-; SKYLAKE-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_sqrtps:
-; SKX: # %bb.0:
-; SKX-NEXT: vsqrtps %ymm0, %ymm0 # sched: [12:6.00]
-; SKX-NEXT: vsqrtps (%rdi), %ymm1 # sched: [19:6.00]
-; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_sqrtps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vsqrtps (%rdi), %ymm1 # sched: [14:21.00]
-; BDVER2-NEXT: vsqrtps %ymm0, %ymm0 # sched: [9:21.00]
-; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_sqrtps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vsqrtps (%rdi), %ymm1 # sched: [47:42.00]
-; BTVER2-NEXT: vsqrtps %ymm0, %ymm0 # sched: [42:42.00]
-; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_sqrtps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vsqrtps (%rdi), %ymm1 # sched: [35:28.00]
-; ZNVER1-NEXT: vsqrtps %ymm0, %ymm0 # sched: [28:28.00]
-; ZNVER1-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0)
- %2 = load <8 x float>, <8 x float> *%a1, align 32
- %3 = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %2)
- %4 = fadd <8 x float> %1, %3
- ret <8 x float> %4
-}
-declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
-
-define <4 x double> @test_subpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
-; GENERIC-LABEL: test_subpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_subpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_subpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_subpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_subpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_subpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_subpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_subpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_subpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fsub <4 x double> %a0, %a1
- %2 = load <4 x double>, <4 x double> *%a2, align 32
- %3 = fsub <4 x double> %1, %2
- ret <4 x double> %3
-}
-
-define <8 x float> @test_subps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
-; GENERIC-LABEL: test_subps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_subps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_subps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_subps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_subps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_subps:
-; SKX: # %bb.0:
-; SKX-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_subps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_subps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_subps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fsub <8 x float> %a0, %a1
- %2 = load <8 x float>, <8 x float> *%a2, align 32
- %3 = fsub <8 x float> %1, %2
- ret <8 x float> %3
-}
-
-define i32 @test_testpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_testpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: xorl %eax, %eax # sched: [0:0.25]
-; GENERIC-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: setb %al # sched: [1:0.50]
-; GENERIC-NEXT: vtestpd (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: adcl $0, %eax # sched: [2:0.67]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_testpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: xorl %eax, %eax # sched: [0:0.25]
-; SANDY-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT: setb %al # sched: [1:0.50]
-; SANDY-NEXT: vtestpd (%rdi), %xmm0 # sched: [7:1.00]
-; SANDY-NEXT: adcl $0, %eax # sched: [2:0.67]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_testpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: xorl %eax, %eax # sched: [1:0.25]
-; HASWELL-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: setb %al # sched: [1:0.50]
-; HASWELL-NEXT: vtestpd (%rdi), %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT: adcl $0, %eax # sched: [2:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_testpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: xorl %eax, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: setb %al # sched: [1:0.50]
-; BROADWELL-NEXT: vtestpd (%rdi), %xmm0 # sched: [6:1.00]
-; BROADWELL-NEXT: adcl $0, %eax # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_testpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: xorl %eax, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: vtestpd %xmm1, %xmm0 # sched: [2:1.00]
-; SKYLAKE-NEXT: setb %al # sched: [1:0.50]
-; SKYLAKE-NEXT: vtestpd (%rdi), %xmm0 # sched: [8:1.00]
-; SKYLAKE-NEXT: adcl $0, %eax # sched: [1:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_testpd:
-; SKX: # %bb.0:
-; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25]
-; SKX-NEXT: vtestpd %xmm1, %xmm0 # sched: [2:1.00]
-; SKX-NEXT: setb %al # sched: [1:0.50]
-; SKX-NEXT: vtestpd (%rdi), %xmm0 # sched: [8:1.00]
-; SKX-NEXT: adcl $0, %eax # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_testpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: xorl %eax, %eax # sched: [0:0.25]
-; BDVER2-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:1.00]
-; BDVER2-NEXT: setb %al # sched: [1:0.50]
-; BDVER2-NEXT: vtestpd (%rdi), %xmm0 # sched: [6:1.00]
-; BDVER2-NEXT: adcl $0, %eax # sched: [1:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_testpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: xorl %eax, %eax # sched: [0:0.50]
-; BTVER2-NEXT: vtestpd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: setb %al # sched: [1:0.50]
-; BTVER2-NEXT: vtestpd (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-NEXT: adcl $0, %eax # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_testpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: xorl %eax, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: setb %al # sched: [1:0.25]
-; ZNVER1-NEXT: vtestpd (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: adcl $0, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1)
- %2 = load <2 x double>, <2 x double> *%a2, align 16
- %3 = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %2)
- %4 = add i32 %1, %3
- ret i32 %4
-}
-declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone
-
-define i32 @test_testpd_ymm(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
-; GENERIC-LABEL: test_testpd_ymm:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: xorl %eax, %eax # sched: [0:0.25]
-; GENERIC-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: setb %al # sched: [1:0.50]
-; GENERIC-NEXT: vtestpd (%rdi), %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: adcl $0, %eax # sched: [2:0.67]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_testpd_ymm:
-; SANDY: # %bb.0:
-; SANDY-NEXT: xorl %eax, %eax # sched: [0:0.25]
-; SANDY-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:1.00]
-; SANDY-NEXT: setb %al # sched: [1:0.50]
-; SANDY-NEXT: vtestpd (%rdi), %ymm0 # sched: [8:1.00]
-; SANDY-NEXT: adcl $0, %eax # sched: [2:0.67]
-; SANDY-NEXT: vzeroupper # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_testpd_ymm:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: xorl %eax, %eax # sched: [1:0.25]
-; HASWELL-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:1.00]
-; HASWELL-NEXT: setb %al # sched: [1:0.50]
-; HASWELL-NEXT: vtestpd (%rdi), %ymm0 # sched: [8:1.00]
-; HASWELL-NEXT: adcl $0, %eax # sched: [2:0.50]
-; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_testpd_ymm:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: xorl %eax, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:1.00]
-; BROADWELL-NEXT: setb %al # sched: [1:0.50]
-; BROADWELL-NEXT: vtestpd (%rdi), %ymm0 # sched: [7:1.00]
-; BROADWELL-NEXT: adcl $0, %eax # sched: [1:0.50]
-; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_testpd_ymm:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: xorl %eax, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: vtestpd %ymm1, %ymm0 # sched: [2:1.00]
-; SKYLAKE-NEXT: setb %al # sched: [1:0.50]
-; SKYLAKE-NEXT: vtestpd (%rdi), %ymm0 # sched: [9:1.00]
-; SKYLAKE-NEXT: adcl $0, %eax # sched: [1:0.50]
-; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_testpd_ymm:
-; SKX: # %bb.0:
-; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25]
-; SKX-NEXT: vtestpd %ymm1, %ymm0 # sched: [2:1.00]
-; SKX-NEXT: setb %al # sched: [1:0.50]
-; SKX-NEXT: vtestpd (%rdi), %ymm0 # sched: [9:1.00]
-; SKX-NEXT: adcl $0, %eax # sched: [1:0.50]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_testpd_ymm:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: xorl %eax, %eax # sched: [0:0.25]
-; BDVER2-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:1.00]
-; BDVER2-NEXT: setb %al # sched: [1:0.50]
-; BDVER2-NEXT: vtestpd (%rdi), %ymm0 # sched: [6:1.00]
-; BDVER2-NEXT: adcl $0, %eax # sched: [1:1.00]
-; BDVER2-NEXT: vzeroupper # sched: [46:4.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_testpd_ymm:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: xorl %eax, %eax # sched: [0:0.50]
-; BTVER2-NEXT: vtestpd %ymm1, %ymm0 # sched: [4:2.00]
-; BTVER2-NEXT: setb %al # sched: [1:0.50]
-; BTVER2-NEXT: vtestpd (%rdi), %ymm0 # sched: [9:2.00]
-; BTVER2-NEXT: adcl $0, %eax # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_testpd_ymm:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: xorl %eax, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: setb %al # sched: [1:0.25]
-; ZNVER1-NEXT: vtestpd (%rdi), %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: adcl $0, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1)
- %2 = load <4 x double>, <4 x double> *%a2, align 32
- %3 = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %2)
- %4 = add i32 %1, %3
- ret i32 %4
-}
-declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone
-
-define i32 @test_testps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
-; GENERIC-LABEL: test_testps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: xorl %eax, %eax # sched: [0:0.25]
-; GENERIC-NEXT: vtestps %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: setb %al # sched: [1:0.50]
-; GENERIC-NEXT: vtestps (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: adcl $0, %eax # sched: [2:0.67]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_testps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: xorl %eax, %eax # sched: [0:0.25]
-; SANDY-NEXT: vtestps %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT: setb %al # sched: [1:0.50]
-; SANDY-NEXT: vtestps (%rdi), %xmm0 # sched: [7:1.00]
-; SANDY-NEXT: adcl $0, %eax # sched: [2:0.67]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_testps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: xorl %eax, %eax # sched: [1:0.25]
-; HASWELL-NEXT: vtestps %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: setb %al # sched: [1:0.50]
-; HASWELL-NEXT: vtestps (%rdi), %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT: adcl $0, %eax # sched: [2:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_testps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: xorl %eax, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: vtestps %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: setb %al # sched: [1:0.50]
-; BROADWELL-NEXT: vtestps (%rdi), %xmm0 # sched: [6:1.00]
-; BROADWELL-NEXT: adcl $0, %eax # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_testps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: xorl %eax, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: vtestps %xmm1, %xmm0 # sched: [2:1.00]
-; SKYLAKE-NEXT: setb %al # sched: [1:0.50]
-; SKYLAKE-NEXT: vtestps (%rdi), %xmm0 # sched: [8:1.00]
-; SKYLAKE-NEXT: adcl $0, %eax # sched: [1:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_testps:
-; SKX: # %bb.0:
-; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25]
-; SKX-NEXT: vtestps %xmm1, %xmm0 # sched: [2:1.00]
-; SKX-NEXT: setb %al # sched: [1:0.50]
-; SKX-NEXT: vtestps (%rdi), %xmm0 # sched: [8:1.00]
-; SKX-NEXT: adcl $0, %eax # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_testps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: xorl %eax, %eax # sched: [0:0.25]
-; BDVER2-NEXT: vtestps %xmm1, %xmm0 # sched: [1:1.00]
-; BDVER2-NEXT: setb %al # sched: [1:0.50]
-; BDVER2-NEXT: vtestps (%rdi), %xmm0 # sched: [6:1.00]
-; BDVER2-NEXT: adcl $0, %eax # sched: [1:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_testps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: xorl %eax, %eax # sched: [0:0.50]
-; BTVER2-NEXT: vtestps %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: setb %al # sched: [1:0.50]
-; BTVER2-NEXT: vtestps (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-NEXT: adcl $0, %eax # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_testps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: xorl %eax, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: vtestps %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: setb %al # sched: [1:0.25]
-; ZNVER1-NEXT: vtestps (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: adcl $0, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1)
- %2 = load <4 x float>, <4 x float> *%a2, align 16
- %3 = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %2)
- %4 = add i32 %1, %3
- ret i32 %4
-}
-declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone
-
-define i32 @test_testps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
-; GENERIC-LABEL: test_testps_ymm:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: xorl %eax, %eax # sched: [0:0.25]
-; GENERIC-NEXT: vtestps %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: setb %al # sched: [1:0.50]
-; GENERIC-NEXT: vtestps (%rdi), %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: adcl $0, %eax # sched: [2:0.67]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_testps_ymm:
-; SANDY: # %bb.0:
-; SANDY-NEXT: xorl %eax, %eax # sched: [0:0.25]
-; SANDY-NEXT: vtestps %ymm1, %ymm0 # sched: [1:1.00]
-; SANDY-NEXT: setb %al # sched: [1:0.50]
-; SANDY-NEXT: vtestps (%rdi), %ymm0 # sched: [8:1.00]
-; SANDY-NEXT: adcl $0, %eax # sched: [2:0.67]
-; SANDY-NEXT: vzeroupper # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_testps_ymm:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: xorl %eax, %eax # sched: [1:0.25]
-; HASWELL-NEXT: vtestps %ymm1, %ymm0 # sched: [1:1.00]
-; HASWELL-NEXT: setb %al # sched: [1:0.50]
-; HASWELL-NEXT: vtestps (%rdi), %ymm0 # sched: [8:1.00]
-; HASWELL-NEXT: adcl $0, %eax # sched: [2:0.50]
-; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_testps_ymm:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: xorl %eax, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: vtestps %ymm1, %ymm0 # sched: [1:1.00]
-; BROADWELL-NEXT: setb %al # sched: [1:0.50]
-; BROADWELL-NEXT: vtestps (%rdi), %ymm0 # sched: [7:1.00]
-; BROADWELL-NEXT: adcl $0, %eax # sched: [1:0.50]
-; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_testps_ymm:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: xorl %eax, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: vtestps %ymm1, %ymm0 # sched: [2:1.00]
-; SKYLAKE-NEXT: setb %al # sched: [1:0.50]
-; SKYLAKE-NEXT: vtestps (%rdi), %ymm0 # sched: [9:1.00]
-; SKYLAKE-NEXT: adcl $0, %eax # sched: [1:0.50]
-; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_testps_ymm:
-; SKX: # %bb.0:
-; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25]
-; SKX-NEXT: vtestps %ymm1, %ymm0 # sched: [2:1.00]
-; SKX-NEXT: setb %al # sched: [1:0.50]
-; SKX-NEXT: vtestps (%rdi), %ymm0 # sched: [9:1.00]
-; SKX-NEXT: adcl $0, %eax # sched: [1:0.50]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_testps_ymm:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: xorl %eax, %eax # sched: [0:0.25]
-; BDVER2-NEXT: vtestps %ymm1, %ymm0 # sched: [1:1.00]
-; BDVER2-NEXT: setb %al # sched: [1:0.50]
-; BDVER2-NEXT: vtestps (%rdi), %ymm0 # sched: [6:1.00]
-; BDVER2-NEXT: adcl $0, %eax # sched: [1:1.00]
-; BDVER2-NEXT: vzeroupper # sched: [46:4.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_testps_ymm:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: xorl %eax, %eax # sched: [0:0.50]
-; BTVER2-NEXT: vtestps %ymm1, %ymm0 # sched: [4:2.00]
-; BTVER2-NEXT: setb %al # sched: [1:0.50]
-; BTVER2-NEXT: vtestps (%rdi), %ymm0 # sched: [9:2.00]
-; BTVER2-NEXT: adcl $0, %eax # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_testps_ymm:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: xorl %eax, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: vtestps %ymm1, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: setb %al # sched: [1:0.25]
-; ZNVER1-NEXT: vtestps (%rdi), %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: adcl $0, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1)
- %2 = load <8 x float>, <8 x float> *%a2, align 32
- %3 = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %2)
- %4 = add i32 %1, %3
- ret i32 %4
-}
-declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone
-
-define <4 x double> @test_unpckhpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
-; GENERIC-LABEL: test_unpckhpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:1.00]
-; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_unpckhpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
-; SANDY-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:1.00]
-; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_unpckhpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
-; HASWELL-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:1.00]
-; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_unpckhpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
-; BROADWELL-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [7:1.00]
-; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_unpckhpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
-; SKYLAKE-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:1.00]
-; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_unpckhpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:1.00]
-; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_unpckhpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [2:1.00]
-; BDVER2-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [7:1.00]
-; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_unpckhpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
-; BTVER2-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [6:2.00]
-; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_unpckhpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:0.50]
-; ZNVER1-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:0.50]
-; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
- %2 = load <4 x double>, <4 x double> *%a2, align 32
- %3 = shufflevector <4 x double> %a1, <4 x double> %2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
- %4 = fadd <4 x double> %1, %3
- ret <4 x double> %4
-}
-
-define <8 x float> @test_unpckhps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) nounwind {
-; GENERIC-LABEL: test_unpckhps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_unpckhps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
-; SANDY-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_unpckhps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
-; HASWELL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_unpckhps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
-; BROADWELL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_unpckhps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
-; SKYLAKE-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_unpckhps:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_unpckhps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [2:1.00]
-; BDVER2-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_unpckhps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
-; BTVER2-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [6:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_unpckhps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:0.50]
-; ZNVER1-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
- %2 = load <8 x float>, <8 x float> *%a2, align 32
- %3 = shufflevector <8 x float> %1, <8 x float> %2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
- ret <8 x float> %3
-}
-
-define <4 x double> @test_unpcklpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
-; GENERIC-LABEL: test_unpcklpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00]
-; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_unpcklpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
-; SANDY-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00]
-; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_unpcklpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
-; HASWELL-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00]
-; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_unpcklpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
-; BROADWELL-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [7:1.00]
-; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_unpcklpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
-; SKYLAKE-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00]
-; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_unpcklpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00]
-; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_unpcklpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [2:1.00]
-; BDVER2-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [7:1.00]
-; BDVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_unpcklpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
-; BTVER2-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [6:2.00]
-; BTVER2-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_unpcklpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:0.50]
-; ZNVER1-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:0.50]
-; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
- %2 = load <4 x double>, <4 x double> *%a2, align 32
- %3 = shufflevector <4 x double> %a1, <4 x double> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
- %4 = fadd <4 x double> %1, %3
- ret <4 x double> %4
-}
-
-define <8 x float> @test_unpcklps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) nounwind {
-; GENERIC-LABEL: test_unpcklps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_unpcklps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
-; SANDY-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_unpcklps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
-; HASWELL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_unpcklps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
-; BROADWELL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_unpcklps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
-; SKYLAKE-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_unpcklps:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_unpcklps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [2:1.00]
-; BDVER2-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_unpcklps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
-; BTVER2-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [6:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_unpcklps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:0.50]
-; ZNVER1-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
- %2 = load <8 x float>, <8 x float> *%a2, align 32
- %3 = shufflevector <8 x float> %1, <8 x float> %2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
- ret <8 x float> %3
-}
-
-define <4 x double> @test_xorpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) {
-; GENERIC-LABEL: test_xorpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_xorpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; SANDY-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_xorpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; HASWELL-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_xorpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; BROADWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_xorpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_xorpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_xorpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
-; BDVER2-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; BDVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_xorpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; BTVER2-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [6:2.00]
-; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_xorpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = bitcast <4 x double> %a0 to <4 x i64>
- %2 = bitcast <4 x double> %a1 to <4 x i64>
- %3 = xor <4 x i64> %1, %2
- %4 = load <4 x double>, <4 x double> *%a2, align 32
- %5 = bitcast <4 x double> %4 to <4 x i64>
- %6 = xor <4 x i64> %3, %5
- %7 = bitcast <4 x i64> %6 to <4 x double>
- %8 = fadd <4 x double> %a1, %7
- ret <4 x double> %8
-}
-
-define <8 x float> @test_xorps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) {
-; GENERIC-LABEL: test_xorps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_xorps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; SANDY-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_xorps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; HASWELL-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_xorps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_xorps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_xorps:
-; SKX: # %bb.0:
-; SKX-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_xorps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
-; BDVER2-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_xorps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; BTVER2-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [6:2.00]
-; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_xorps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = bitcast <8 x float> %a0 to <4 x i64>
- %2 = bitcast <8 x float> %a1 to <4 x i64>
- %3 = xor <4 x i64> %1, %2
- %4 = load <8 x float>, <8 x float> *%a2, align 32
- %5 = bitcast <8 x float> %4 to <4 x i64>
- %6 = xor <4 x i64> %3, %5
- %7 = bitcast <4 x i64> %6 to <8 x float>
- %8 = fadd <8 x float> %a1, %7
- ret <8 x float> %8
-}
-
-define void @test_zeroall() {
-; GENERIC-LABEL: test_zeroall:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vzeroall # sched: [9:2.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_zeroall:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vzeroall # sched: [9:2.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_zeroall:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vzeroall # sched: [8:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_zeroall:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vzeroall # sched: [8:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_zeroall:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vzeroall # sched: [16:4.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_zeroall:
-; SKX: # %bb.0:
-; SKX-NEXT: vzeroall # sched: [12:5.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_zeroall:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vzeroall # sched: [90:8.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_zeroall:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vzeroall # sched: [90:36.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_zeroall:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vzeroall # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void @llvm.x86.avx.vzeroall()
- ret void
-}
-declare void @llvm.x86.avx.vzeroall() nounwind
-
-define void @test_zeroupper() {
-; GENERIC-LABEL: test_zeroupper:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_zeroupper:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vzeroupper # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_zeroupper:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_zeroupper:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_zeroupper:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_zeroupper:
-; SKX: # %bb.0:
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_zeroupper:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vzeroupper # sched: [46:4.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_zeroupper:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vzeroupper # sched: [46:18.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_zeroupper:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void @llvm.x86.avx.vzeroupper()
- ret void
-}
-
-define void @test_avx256_zero_idioms() {
-; GENERIC-LABEL: test_avx256_zero_idioms:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vxorps %ymm0, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vxorpd %ymm1, %ymm1, %ymm1 # sched: [1:1.00]
-; GENERIC-NEXT: vandnps %ymm2, %ymm2, %ymm2 # sched: [1:1.00]
-; GENERIC-NEXT: vandnpd %ymm3, %ymm3, %ymm3 # sched: [1:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_avx256_zero_idioms:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: vxorps %ymm0, %ymm0, %ymm0 # sched: [1:1.00]
-; SANDY-NEXT: vxorpd %ymm1, %ymm1, %ymm1 # sched: [1:1.00]
-; SANDY-NEXT: vandnps %ymm2, %ymm2, %ymm2 # sched: [1:1.00]
-; SANDY-NEXT: vandnpd %ymm3, %ymm3, %ymm3 # sched: [1:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_avx256_zero_idioms:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: vxorps %ymm0, %ymm0, %ymm0 # sched: [1:1.00]
-; HASWELL-NEXT: vxorpd %ymm1, %ymm1, %ymm1 # sched: [1:1.00]
-; HASWELL-NEXT: vandnps %ymm2, %ymm2, %ymm2 # sched: [1:1.00]
-; HASWELL-NEXT: vandnpd %ymm3, %ymm3, %ymm3 # sched: [1:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_avx256_zero_idioms:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: vxorps %ymm0, %ymm0, %ymm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vxorpd %ymm1, %ymm1, %ymm1 # sched: [1:1.00]
-; BROADWELL-NEXT: vandnps %ymm2, %ymm2, %ymm2 # sched: [1:1.00]
-; BROADWELL-NEXT: vandnpd %ymm3, %ymm3, %ymm3 # sched: [1:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_avx256_zero_idioms:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: vxorps %ymm0, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vxorpd %ymm1, %ymm1, %ymm1 # sched: [1:0.33]
-; SKYLAKE-NEXT: vandnps %ymm2, %ymm2, %ymm2 # sched: [1:0.33]
-; SKYLAKE-NEXT: vandnpd %ymm3, %ymm3, %ymm3 # sched: [1:0.33]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_avx256_zero_idioms:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: vxorps %ymm0, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: vxorpd %ymm1, %ymm1, %ymm1 # sched: [1:0.33]
-; SKX-NEXT: vandnps %ymm2, %ymm2, %ymm2 # sched: [1:0.33]
-; SKX-NEXT: vandnpd %ymm3, %ymm3, %ymm3 # sched: [1:0.33]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_avx256_zero_idioms:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: vxorps %ymm0, %ymm0, %ymm0 # sched: [2:1.00]
-; BDVER2-NEXT: vxorpd %ymm1, %ymm1, %ymm1 # sched: [2:1.00]
-; BDVER2-NEXT: vandnps %ymm2, %ymm2, %ymm2 # sched: [2:1.00]
-; BDVER2-NEXT: vandnpd %ymm3, %ymm3, %ymm3 # sched: [2:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_avx256_zero_idioms:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: vxorps %ymm0, %ymm0, %ymm0 # sched: [1:0.50]
-; BTVER2-NEXT: vxorpd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
-; BTVER2-NEXT: vandnps %ymm2, %ymm2, %ymm2 # sched: [1:0.50]
-; BTVER2-NEXT: vandnpd %ymm3, %ymm3, %ymm3 # sched: [1:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_avx256_zero_idioms:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: vxorps %ymm0, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vxorpd %ymm1, %ymm1, %ymm1 # sched: [1:0.25]
-; ZNVER1-NEXT: vandnps %ymm2, %ymm2, %ymm2 # sched: [1:0.25]
-; ZNVER1-NEXT: vandnpd %ymm3, %ymm3, %ymm3 # sched: [1:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "vxorps %ymm0, %ymm0, %ymm0\0Avxorpd %ymm1, %ymm1, %ymm1\0Avandnps %ymm2, %ymm2, %ymm2\0Avandnpd %ymm3, %ymm3, %ymm3", ""()
- ret void
-}
-declare void @llvm.x86.avx.vzeroupper() nounwind
-
-!0 = !{i32 1}
diff --git a/llvm/test/CodeGen/X86/avx2-schedule.ll b/llvm/test/CodeGen/X86/avx2-schedule.ll
deleted file mode 100644
index 00c4fcb114f..00000000000
--- a/llvm/test/CodeGen/X86/avx2-schedule.ll
+++ /dev/null
@@ -1,7111 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
-
-define <8 x i32> @test_broadcasti128(<8 x i32> %a0, <4 x i32> *%a1) {
-; GENERIC-LABEL: test_broadcasti128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [6:1.00]
-; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_broadcasti128:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [7:0.50]
-; HASWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_broadcasti128:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [6:0.50]
-; BROADWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_broadcasti128:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [7:0.50]
-; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_broadcasti128:
-; SKX: # %bb.0:
-; SKX-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [7:0.50]
-; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_broadcasti128:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [8:0.50]
-; ZNVER1-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load <4 x i32>, <4 x i32> *%a1, align 16
- %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
- %3 = add <8 x i32> %2, %a0
- ret <8 x i32> %3
-}
-
-define <4 x double> @test_broadcastsd_ymm(<2 x double> %a0) {
-; GENERIC-LABEL: test_broadcastsd_ymm:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_broadcastsd_ymm:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_broadcastsd_ymm:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_broadcastsd_ymm:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00]
-; SKYLAKE-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_broadcastsd_ymm:
-; SKX: # %bb.0:
-; SKX-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00]
-; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_broadcastsd_ymm:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [100:0.25]
-; ZNVER1-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <2 x double> %a0, <2 x double> undef, <4 x i32> zeroinitializer
- %2 = fadd <4 x double> %1, %1
- ret <4 x double> %2
-}
-
-define <4 x float> @test_broadcastss(<4 x float> %a0) {
-; GENERIC-LABEL: test_broadcastss:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_broadcastss:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_broadcastss:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_broadcastss:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_broadcastss:
-; SKX: # %bb.0:
-; SKX-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00]
-; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_broadcastss:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:0.50]
-; ZNVER1-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> zeroinitializer
- %2 = fadd <4 x float> %1, %1
- ret <4 x float> %2
-}
-
-define <8 x float> @test_broadcastss_ymm(<4 x float> %a0) {
-; GENERIC-LABEL: test_broadcastss_ymm:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_broadcastss_ymm:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_broadcastss_ymm:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_broadcastss_ymm:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00]
-; SKYLAKE-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_broadcastss_ymm:
-; SKX: # %bb.0:
-; SKX-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00]
-; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_broadcastss_ymm:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [100:0.25]
-; ZNVER1-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <4 x float> %a0, <4 x float> undef, <8 x i32> zeroinitializer
- %2 = fadd <8 x float> %1, %1
- ret <8 x float> %2
-}
-
-define <4 x i32> @test_extracti128(<8 x i16> %a0, <4 x i32> *%a1) {
-; GENERIC-LABEL: test_extracti128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
-; GENERIC-NEXT: vextracti128 $1, %ymm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vextracti128 $1, %ymm1, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_extracti128:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
-; HASWELL-NEXT: vextracti128 $1, %ymm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vextracti128 $1, %ymm1, (%rdi) # sched: [1:1.00]
-; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_extracti128:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
-; BROADWELL-NEXT: vextracti128 $1, %ymm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vextracti128 $1, %ymm1, (%rdi) # sched: [1:1.00]
-; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_extracti128:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
-; SKYLAKE-NEXT: vextracti128 $1, %ymm1, %xmm0 # sched: [3:1.00]
-; SKYLAKE-NEXT: vextracti128 $1, %ymm1, (%rdi) # sched: [1:1.00]
-; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_extracti128:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
-; SKX-NEXT: vextracti128 $1, %ymm1, %xmm0 # sched: [3:1.00]
-; SKX-NEXT: vextracti128 $1, %ymm1, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_extracti128:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
-; ZNVER1-NEXT: vextracti128 $1, %ymm1, %xmm0 # sched: [2:0.25]
-; ZNVER1-NEXT: vextracti128 $1, %ymm1, (%rdi) # sched: [1:0.50]
-; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %z = zext <8 x i16> %a0 to <8 x i32>
- %ext = shufflevector <8 x i32> %z, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
- store <4 x i32> %ext, <4 x i32> *%a1
- ret <4 x i32> %ext
-}
-
-define <2 x double> @test_gatherdpd(<2 x double> %a0, i8* %a1, <4 x i32> %a2, <2 x double> %a3) {
-; GENERIC-LABEL: test_gatherdpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_gatherdpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [26:2.67]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_gatherdpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:3.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_gatherdpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_gatherdpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_gatherdpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %a0, i8* %a1, <4 x i32> %a2, <2 x double> %a3, i8 2)
- ret <2 x double> %1
-}
-declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*, <4 x i32>, <2 x double>, i8) nounwind readonly
-
-define <4 x double> @test_gatherdpd_ymm(<4 x double> %a0, i8* %a1, <4 x i32> %a2, <4 x double> %a3) {
-; GENERIC-LABEL: test_gatherdpd_ymm:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [5:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_gatherdpd_ymm:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [27:4.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_gatherdpd_ymm:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [26:5.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_gatherdpd_ymm:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [25:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_gatherdpd_ymm:
-; SKX: # %bb.0:
-; SKX-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [25:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_gatherdpd_ymm:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0, i8* %a1, <4 x i32> %a2, <4 x double> %a3, i8 8)
- ret <4 x double> %1
-}
-declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*, <4 x i32>, <4 x double>, i8) nounwind readonly
-
-define <4 x float> @test_gatherdps(<4 x float> %a0, i8* %a1, <4 x i32> %a2, <4 x float> %a3) {
-; GENERIC-LABEL: test_gatherdps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_gatherdps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:3.67]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_gatherdps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:3.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_gatherdps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_gatherdps:
-; SKX: # %bb.0:
-; SKX-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_gatherdps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %a0, i8* %a1, <4 x i32> %a2, <4 x float> %a3, i8 2)
- ret <4 x float> %1
-}
-declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*, <4 x i32>, <4 x float>, i8) nounwind readonly
-
-define <8 x float> @test_gatherdps_ymm(<8 x float> %a0, i8* %a1, <8 x i32> %a2, <8 x float> %a3) {
-; GENERIC-LABEL: test_gatherdps_ymm:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [5:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_gatherdps_ymm:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [27:6.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_gatherdps_ymm:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [26:4.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_gatherdps_ymm:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [25:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_gatherdps_ymm:
-; SKX: # %bb.0:
-; SKX-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [25:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_gatherdps_ymm:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0, i8* %a1, <8 x i32> %a2, <8 x float> %a3, i8 4)
- ret <8 x float> %1
-}
-declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*, <8 x i32>, <8 x float>, i8) nounwind readonly
-
-define <2 x double> @test_gatherqpd(<2 x double> %a0, i8* %a1, <2 x i64> %a2, <2 x double> %a3) {
-; GENERIC-LABEL: test_gatherqpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_gatherqpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [23:3.33]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_gatherqpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:3.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_gatherqpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_gatherqpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_gatherqpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %a0, i8* %a1, <2 x i64> %a2, <2 x double> %a3, i8 2)
- ret <2 x double> %1
-}
-declare <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double>, i8*, <2 x i64>, <2 x double>, i8) nounwind readonly
-
-define <4 x double> @test_gatherqpd_ymm(<4 x double> %a0, i8* %a1, <4 x i64> %a2, <4 x double> %a3) {
-; GENERIC-LABEL: test_gatherqpd_ymm:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [5:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_gatherqpd_ymm:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [24:5.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_gatherqpd_ymm:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [23:3.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_gatherqpd_ymm:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [25:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_gatherqpd_ymm:
-; SKX: # %bb.0:
-; SKX-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [25:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_gatherqpd_ymm:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %a0, i8* %a1, <4 x i64> %a2, <4 x double> %a3, i8 8)
- ret <4 x double> %1
-}
-declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, i8*, <4 x i64>, <4 x double>, i8) nounwind readonly
-
-define <4 x float> @test_gatherqps(<4 x float> %a0, i8* %a1, <2 x i64> %a2, <4 x float> %a3) {
-; GENERIC-LABEL: test_gatherqps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_gatherqps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:3.67]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_gatherqps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [27:5.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_gatherqps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_gatherqps:
-; SKX: # %bb.0:
-; SKX-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_gatherqps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %a0, i8* %a1, <2 x i64> %a2, <4 x float> %a3, i8 2)
- ret <4 x float> %1
-}
-declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, i8*, <2 x i64>, <4 x float>, i8) nounwind readonly
-
-define <4 x float> @test_gatherqps_ymm(<4 x float> %a0, i8* %a1, <4 x i64> %a2, <4 x float> %a3) {
-; GENERIC-LABEL: test_gatherqps_ymm:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_gatherqps_ymm:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [28:3.67]
-; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_gatherqps_ymm:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [24:5.00]
-; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_gatherqps_ymm:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [25:1.00]
-; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_gatherqps_ymm:
-; SKX: # %bb.0:
-; SKX-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [25:1.00]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_gatherqps_ymm:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %a0, i8* %a1, <4 x i64> %a2, <4 x float> %a3, i8 4)
- ret <4 x float> %1
-}
-declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, i8*, <4 x i64>, <4 x float>, i8) nounwind readonly
-
-define <8 x i32> @test_inserti128(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_inserti128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00]
-; GENERIC-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_inserti128:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
-; HASWELL-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; HASWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_inserti128:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
-; BROADWELL-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [6:0.50]
-; BROADWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_inserti128:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
-; SKYLAKE-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_inserti128:
-; SKX: # %bb.0:
-; SKX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00]
-; SKX-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_inserti128:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 # sched: [2:0.25]
-; ZNVER1-NEXT: vinserti128 $1, (%rdi), %ymm0, %ymm0 # sched: [9:0.50]
-; ZNVER1-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <4 x i32> %a1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
- %2 = shufflevector <8 x i32> %a0, <8 x i32> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
- %3 = load <4 x i32>, <4 x i32> *%a2, align 16
- %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
- %5 = shufflevector <8 x i32> %a0, <8 x i32> %4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
- %6 = add <8 x i32> %2, %5
- ret <8 x i32> %6
-}
-
-define <4 x i64> @test_movntdqa(i8* %a0) {
-; GENERIC-LABEL: test_movntdqa:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_movntdqa:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movntdqa:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movntdqa:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movntdqa:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_movntdqa:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovntdqa (%rdi), %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x i64> @llvm.x86.avx2.movntdqa(i8* %a0)
- ret <4 x i64> %1
-}
-declare <4 x i64> @llvm.x86.avx2.movntdqa(i8*) nounwind readonly
-
-define <16 x i16> @test_mpsadbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
-; GENERIC-LABEL: test_mpsadbw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [7:1.00]
-; GENERIC-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [14:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_mpsadbw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [7:2.00]
-; HASWELL-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [14:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_mpsadbw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [7:2.00]
-; BROADWELL-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_mpsadbw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [4:2.00]
-; SKYLAKE-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [11:2.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_mpsadbw:
-; SKX: # %bb.0:
-; SKX-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [4:2.00]
-; SKX-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [11:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_mpsadbw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # sched: [100:0.25]
-; ZNVER1-NEXT: vmpsadbw $7, (%rdi), %ymm0, %ymm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i8 7)
- %2 = bitcast <16 x i16> %1 to <32 x i8>
- %3 = load <32 x i8>, <32 x i8> *%a2, align 32
- %4 = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %2, <32 x i8> %3, i8 7)
- ret <16 x i16> %4
-}
-declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8) nounwind readnone
-
-define <32 x i8> @test_pabsb(<32 x i8> %a0, <32 x i8> *%a1) {
-; GENERIC-LABEL: test_pabsb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50]
-; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pabsb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50]
-; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pabsb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpabsb (%rdi), %ymm1 # sched: [7:0.50]
-; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pabsb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50]
-; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pabsb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50]
-; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pabsb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpabsb (%rdi), %ymm1 # sched: [8:0.50]
-; ZNVER1-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a0)
- %2 = load <32 x i8>, <32 x i8> *%a1, align 32
- %3 = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %2)
- %4 = or <32 x i8> %1, %3
- ret <32 x i8> %4
-}
-declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone
-
-define <8 x i32> @test_pabsd(<8 x i32> %a0, <8 x i32> *%a1) {
-; GENERIC-LABEL: test_pabsd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50]
-; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pabsd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50]
-; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pabsd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpabsd (%rdi), %ymm1 # sched: [7:0.50]
-; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pabsd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50]
-; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pabsd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50]
-; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pabsd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpabsd (%rdi), %ymm1 # sched: [8:0.50]
-; ZNVER1-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %a0)
- %2 = load <8 x i32>, <8 x i32> *%a1, align 32
- %3 = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %2)
- %4 = or <8 x i32> %1, %3
- ret <8 x i32> %4
-}
-declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone
-
-define <16 x i16> @test_pabsw(<16 x i16> %a0, <16 x i16> *%a1) {
-; GENERIC-LABEL: test_pabsw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50]
-; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pabsw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50]
-; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pabsw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpabsw (%rdi), %ymm1 # sched: [7:0.50]
-; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pabsw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50]
-; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pabsw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50]
-; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pabsw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpabsw (%rdi), %ymm1 # sched: [8:0.50]
-; ZNVER1-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %a0)
- %2 = load <16 x i16>, <16 x i16> *%a1, align 32
- %3 = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %2)
- %4 = or <16 x i16> %1, %3
- ret <16 x i16> %4
-}
-declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone
-
-define <16 x i16> @test_packssdw(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
-; GENERIC-LABEL: test_packssdw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_packssdw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; HASWELL-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_packssdw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_packssdw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_packssdw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; SKX-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_packssdw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpackssdw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a0, <8 x i32> %a1)
- %2 = bitcast <16 x i16> %1 to <8 x i32>
- %3 = load <8 x i32>, <8 x i32> *%a2, align 32
- %4 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %2, <8 x i32> %3)
- ret <16 x i16> %4
-}
-declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone
-
-define <32 x i8> @test_packsswb(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
-; GENERIC-LABEL: test_packsswb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_packsswb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; HASWELL-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_packsswb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_packsswb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_packsswb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; SKX-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_packsswb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpacksswb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a0, <16 x i16> %a1)
- %2 = bitcast <32 x i8> %1 to <16 x i16>
- %3 = load <16 x i16>, <16 x i16> *%a2, align 32
- %4 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %2, <16 x i16> %3)
- ret <32 x i8> %4
-}
-declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone
-
-define <16 x i16> @test_packusdw(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
-; GENERIC-LABEL: test_packusdw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_packusdw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; HASWELL-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_packusdw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_packusdw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_packusdw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; SKX-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_packusdw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpackusdw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a0, <8 x i32> %a1)
- %2 = bitcast <16 x i16> %1 to <8 x i32>
- %3 = load <8 x i32>, <8 x i32> *%a2, align 32
- %4 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %2, <8 x i32> %3)
- ret <16 x i16> %4
-}
-declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone
-
-define <32 x i8> @test_packuswb(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
-; GENERIC-LABEL: test_packuswb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_packuswb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; HASWELL-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_packuswb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_packuswb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_packuswb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; SKX-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_packuswb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpackuswb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a0, <16 x i16> %a1)
- %2 = bitcast <32 x i8> %1 to <16 x i16>
- %3 = load <16 x i16>, <16 x i16> *%a2, align 32
- %4 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %2, <16 x i16> %3)
- ret <32 x i8> %4
-}
-declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone
-
-define <32 x i8> @test_paddb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
-; GENERIC-LABEL: test_paddb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_paddb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_paddb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_paddb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_paddb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_paddb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = add <32 x i8> %a0, %a1
- %2 = load <32 x i8>, <32 x i8> *%a2, align 32
- %3 = add <32 x i8> %1, %2
- ret <32 x i8> %3
-}
-
-define <8 x i32> @test_paddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
-; GENERIC-LABEL: test_paddd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_paddd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_paddd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_paddd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_paddd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_paddd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = add <8 x i32> %a0, %a1
- %2 = load <8 x i32>, <8 x i32> *%a2, align 32
- %3 = add <8 x i32> %1, %2
- ret <8 x i32> %3
-}
-
-define <4 x i64> @test_paddq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
-; GENERIC-LABEL: test_paddq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_paddq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_paddq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_paddq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_paddq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_paddq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = add <4 x i64> %a0, %a1
- %2 = load <4 x i64>, <4 x i64> *%a2, align 32
- %3 = add <4 x i64> %1, %2
- ret <4 x i64> %3
-}
-
-define <32 x i8> @test_paddsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
-; GENERIC-LABEL: test_paddsb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_paddsb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_paddsb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_paddsb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_paddsb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_paddsb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %a0, <32 x i8> %a1)
- %2 = load <32 x i8>, <32 x i8> *%a2, align 32
- %3 = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %1, <32 x i8> %2)
- ret <32 x i8> %3
-}
-declare <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8>, <32 x i8>) nounwind readnone
-
-define <16 x i16> @test_paddsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
-; GENERIC-LABEL: test_paddsw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_paddsw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_paddsw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_paddsw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_paddsw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_paddsw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %a0, <16 x i16> %a1)
- %2 = load <16 x i16>, <16 x i16> *%a2, align 32
- %3 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %1, <16 x i16> %2)
- ret <16 x i16> %3
-}
-declare <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16>, <16 x i16>) nounwind readnone
-
-define <32 x i8> @test_paddusb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
-; GENERIC-LABEL: test_paddusb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_paddusb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_paddusb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_paddusb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_paddusb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_paddusb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpaddusb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpaddusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %a0, <32 x i8> %a1)
- %2 = load <32 x i8>, <32 x i8> *%a2, align 32
- %3 = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %1, <32 x i8> %2)
- ret <32 x i8> %3
-}
-declare <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8>, <32 x i8>) nounwind readnone
-
-define <16 x i16> @test_paddusw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
-; GENERIC-LABEL: test_paddusw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_paddusw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_paddusw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_paddusw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_paddusw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_paddusw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpaddusw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpaddusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %a0, <16 x i16> %a1)
- %2 = load <16 x i16>, <16 x i16> *%a2, align 32
- %3 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %1, <16 x i16> %2)
- ret <16 x i16> %3
-}
-declare <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16>, <16 x i16>) nounwind readnone
-
-define <16 x i16> @test_paddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
-; GENERIC-LABEL: test_paddw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_paddw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_paddw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_paddw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_paddw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_paddw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = add <16 x i16> %a0, %a1
- %2 = load <16 x i16>, <16 x i16> *%a2, align 32
- %3 = add <16 x i16> %1, %2
- ret <16 x i16> %3
-}
-
-define <32 x i8> @test_palignr(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
-; GENERIC-LABEL: test_palignr:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
-; GENERIC-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] sched: [1:1.00]
-; GENERIC-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_palignr:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
-; HASWELL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] sched: [1:1.00]
-; HASWELL-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_palignr:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
-; BROADWELL-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] sched: [1:1.00]
-; BROADWELL-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_palignr:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
-; SKYLAKE-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] sched: [1:1.00]
-; SKYLAKE-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_palignr:
-; SKX: # %bb.0:
-; SKX-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:1.00]
-; SKX-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] sched: [1:1.00]
-; SKX-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_palignr:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpalignr {{.*#+}} ymm1 = ymm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0],ymm1[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16] sched: [1:0.25]
-; ZNVER1-NEXT: vpalignr {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0],ymm0[17,18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16] sched: [1:0.25]
-; ZNVER1-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <32 x i8> %a1, <32 x i8> %a0, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48>
- %2 = load <32 x i8>, <32 x i8> *%a2, align 32
- %3 = shufflevector <32 x i8> %a0, <32 x i8> %1, <32 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48>
- %4 = add <32 x i8> %1, %3
- ret <32 x i8> %4
-}
-
-define <4 x i64> @test_pand(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
-; GENERIC-LABEL: test_pand:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; GENERIC-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pand:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; HASWELL-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pand:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; BROADWELL-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pand:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pand:
-; SKX: # %bb.0:
-; SKX-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pand:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = and <4 x i64> %a0, %a1
- %2 = load <4 x i64>, <4 x i64> *%a2, align 32
- %3 = and <4 x i64> %1, %2
- %4 = add <4 x i64> %3, %a1
- ret <4 x i64> %4
-}
-
-define <4 x i64> @test_pandn(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
-; GENERIC-LABEL: test_pandn:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; GENERIC-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50]
-; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pandn:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; HASWELL-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50]
-; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pandn:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; BROADWELL-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [7:0.50]
-; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pandn:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50]
-; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pandn:
-; SKX: # %bb.0:
-; SKX-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50]
-; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pandn:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50]
-; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = xor <4 x i64> %a0, <i64 -1, i64 -1, i64 -1, i64 -1>
- %2 = and <4 x i64> %a1, %1
- %3 = load <4 x i64>, <4 x i64> *%a2, align 32
- %4 = xor <4 x i64> %2, <i64 -1, i64 -1, i64 -1, i64 -1>
- %5 = and <4 x i64> %3, %4
- %6 = add <4 x i64> %2, %5
- ret <4 x i64> %6
-}
-
-define <32 x i8> @test_pavgb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
-; GENERIC-LABEL: test_pavgb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pavgb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pavgb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pavgb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pavgb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pavgb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpavgb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = zext <32 x i8> %a0 to <32 x i16>
- %2 = zext <32 x i8> %a1 to <32 x i16>
- %3 = add <32 x i16> %1, %2
- %4 = add <32 x i16> %3, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
- %5 = lshr <32 x i16> %4, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
- %6 = trunc <32 x i16> %5 to <32 x i8>
- %7 = load <32 x i8>, <32 x i8> *%a2, align 32
- %8 = zext <32 x i8> %6 to <32 x i16>
- %9 = zext <32 x i8> %7 to <32 x i16>
- %10 = add <32 x i16> %8, %9
- %11 = add <32 x i16> %10, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
- %12 = lshr <32 x i16> %11, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
- %13 = trunc <32 x i16> %12 to <32 x i8>
- ret <32 x i8> %13
-}
-
-define <16 x i16> @test_pavgw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
-; GENERIC-LABEL: test_pavgw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pavgw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pavgw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pavgw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pavgw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pavgw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpavgw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = zext <16 x i16> %a0 to <16 x i32>
- %2 = zext <16 x i16> %a1 to <16 x i32>
- %3 = add <16 x i32> %1, %2
- %4 = add <16 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
- %5 = lshr <16 x i32> %4, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
- %6 = trunc <16 x i32> %5 to <16 x i16>
- %7 = load <16 x i16>, <16 x i16> *%a2, align 32
- %8 = zext <16 x i16> %6 to <16 x i32>
- %9 = zext <16 x i16> %7 to <16 x i32>
- %10 = add <16 x i32> %8, %9
- %11 = add <16 x i32> %10, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
- %12 = lshr <16 x i32> %11, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
- %13 = trunc <16 x i32> %12 to <16 x i16>
- ret <16 x i16> %13
-}
-
-define <4 x i32> @test_pblendd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_pblendd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] sched: [1:0.50]
-; GENERIC-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [7:0.50]
-; GENERIC-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pblendd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] sched: [1:0.33]
-; HASWELL-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [7:0.50]
-; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pblendd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] sched: [1:0.33]
-; BROADWELL-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [6:0.50]
-; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pblendd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] sched: [1:0.33]
-; SKYLAKE-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [7:0.50]
-; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pblendd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] sched: [1:0.33]
-; SKX-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [7:0.50]
-; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pblendd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3] sched: [1:0.50]
-; ZNVER1-NEXT: vpblendd {{.*#+}} xmm1 = mem[0],xmm1[1],mem[2],xmm1[3] sched: [8:1.00]
-; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
- %2 = load <4 x i32>, <4 x i32> *%a2, align 16
- %3 = shufflevector <4 x i32> %a1, <4 x i32> %2, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
- %4 = add <4 x i32> %1, %3
- ret <4 x i32> %4
-}
-
-define <8 x i32> @test_pblendd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
-; GENERIC-LABEL: test_pblendd_ymm:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.50]
-; GENERIC-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [8:0.50]
-; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pblendd_ymm:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33]
-; HASWELL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [8:0.50]
-; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pblendd_ymm:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33]
-; BROADWELL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [7:0.50]
-; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pblendd_ymm:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33]
-; SKYLAKE-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [8:0.50]
-; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pblendd_ymm:
-; SKX: # %bb.0:
-; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.33]
-; SKX-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [8:0.50]
-; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pblendd_ymm:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6],ymm1[7] sched: [1:0.50]
-; ZNVER1-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],mem[1,2],ymm1[3,4,5,6,7] sched: [9:1.50]
-; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 8, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 15>
- %2 = load <8 x i32>, <8 x i32> *%a2, align 32
- %3 = shufflevector <8 x i32> %a1, <8 x i32> %2, <8 x i32> <i32 0, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 7>
- %4 = add <8 x i32> %1, %3
- ret <8 x i32> %4
-}
-
-define <32 x i8> @test_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2, <32 x i8> *%a3, <32 x i8> %a4) {
-; GENERIC-LABEL: test_pblendvb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00]
-; GENERIC-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pblendvb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
-; HASWELL-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pblendvb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
-; BROADWELL-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:2.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pblendvb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67]
-; SKYLAKE-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:0.67]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pblendvb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.67]
-; SKX-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pblendvb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; ZNVER1-NEXT: vpblendvb %ymm3, (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2)
- %2 = load <32 x i8>, <32 x i8> *%a3, align 32
- %3 = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %1, <32 x i8> %2, <32 x i8> %a4)
- ret <32 x i8> %3
-}
-declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) nounwind readnone
-
-define <16 x i16> @test_pblendw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
-; GENERIC-LABEL: test_pblendw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:0.50]
-; GENERIC-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [8:0.50]
-; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pblendw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00]
-; HASWELL-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [8:1.00]
-; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pblendw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00]
-; BROADWELL-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [7:1.00]
-; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pblendw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00]
-; SKYLAKE-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [8:1.00]
-; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pblendw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [1:1.00]
-; SKX-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [8:1.00]
-; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pblendw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4],ymm0[5,6,7,8,9],ymm1[10,11,12],ymm0[13,14,15] sched: [2:0.33]
-; ZNVER1-NEXT: vpblendw {{.*#+}} ymm1 = mem[0],ymm1[1],mem[2],ymm1[3],mem[4],ymm1[5],mem[6],ymm1[7],mem[8],ymm1[9],mem[10],ymm1[11],mem[12],ymm1[13],mem[14],ymm1[15] sched: [9:0.50]
-; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 5, i32 6, i32 7, i32 8, i32 9, i32 26, i32 27, i32 28, i32 13, i32 14, i32 15>
- %2 = load <16 x i16>, <16 x i16> *%a2, align 32
- %3 = shufflevector <16 x i16> %a1, <16 x i16> %2, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
- %4 = add <16 x i16> %1, %3
- ret <16 x i16> %4
-}
-
-define <16 x i8> @test_pbroadcastb(<16 x i8> %a0, <16 x i8> *%a1) {
-; GENERIC-LABEL: test_pbroadcastb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [7:0.50]
-; GENERIC-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pbroadcastb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [9:1.00]
-; HASWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pbroadcastb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [9:1.00]
-; BROADWELL-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pbroadcastb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00]
-; SKYLAKE-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [7:1.00]
-; SKYLAKE-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pbroadcastb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [3:1.00]
-; SKX-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [7:1.00]
-; SKX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pbroadcastb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [8:1.00]
-; ZNVER1-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> zeroinitializer
- %2 = load <16 x i8>, <16 x i8> *%a1, align 16
- %3 = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> zeroinitializer
- %4 = add <16 x i8> %1, %3
- ret <16 x i8> %4
-}
-
-define <32 x i8> @test_pbroadcastb_ymm(<32 x i8> %a0, <32 x i8> *%a1) {
-; GENERIC-LABEL: test_pbroadcastb_ymm:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [7:0.50]
-; GENERIC-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pbroadcastb_ymm:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [9:1.00]
-; HASWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pbroadcastb_ymm:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [9:1.00]
-; BROADWELL-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pbroadcastb_ymm:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00]
-; SKYLAKE-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [8:1.00]
-; SKYLAKE-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pbroadcastb_ymm:
-; SKX: # %bb.0:
-; SKX-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [3:1.00]
-; SKX-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [8:1.00]
-; SKX-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pbroadcastb_ymm:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [8:2.00]
-; ZNVER1-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [2:0.25]
-; ZNVER1-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> zeroinitializer
- %2 = load <32 x i8>, <32 x i8> *%a1, align 32
- %3 = shufflevector <32 x i8> %2, <32 x i8> undef, <32 x i32> zeroinitializer
- %4 = add <32 x i8> %1, %3
- ret <32 x i8> %4
-}
-
-define <4 x i32> @test_pbroadcastd(<4 x i32> %a0, <4 x i32> *%a1) {
-; GENERIC-LABEL: test_pbroadcastd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [7:0.50]
-; GENERIC-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pbroadcastd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [6:0.50]
-; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pbroadcastd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [5:0.50]
-; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pbroadcastd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [6:0.50]
-; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pbroadcastd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
-; SKX-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [6:0.50]
-; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pbroadcastd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [8:0.50]
-; ZNVER1-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> zeroinitializer
- %2 = load <4 x i32>, <4 x i32> *%a1, align 16
- %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> zeroinitializer
- %4 = add <4 x i32> %1, %3
- ret <4 x i32> %4
-}
-
-define <8 x i32> @test_pbroadcastd_ymm(<8 x i32> %a0, <8 x i32> *%a1) {
-; GENERIC-LABEL: test_pbroadcastd_ymm:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [7:0.50]
-; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pbroadcastd_ymm:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [7:0.50]
-; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pbroadcastd_ymm:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [6:0.50]
-; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pbroadcastd_ymm:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00]
-; SKYLAKE-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [7:0.50]
-; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pbroadcastd_ymm:
-; SKX: # %bb.0:
-; SKX-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [3:1.00]
-; SKX-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [7:0.50]
-; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pbroadcastd_ymm:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [8:0.50]
-; ZNVER1-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [2:0.25]
-; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> zeroinitializer
- %2 = load <8 x i32>, <8 x i32> *%a1, align 32
- %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> zeroinitializer
- %4 = add <8 x i32> %1, %3
- ret <8 x i32> %4
-}
-
-define <2 x i64> @test_pbroadcastq(<2 x i64> %a0, <2 x i64> *%a1) {
-; GENERIC-LABEL: test_pbroadcastq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [7:0.50]
-; GENERIC-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pbroadcastq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [6:0.50]
-; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pbroadcastq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [5:0.50]
-; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pbroadcastq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [6:0.50]
-; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pbroadcastq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
-; SKX-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [6:0.50]
-; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pbroadcastq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [8:0.50]
-; ZNVER1-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> zeroinitializer
- %2 = load <2 x i64>, <2 x i64> *%a1, align 16
- %3 = shufflevector <2 x i64> %2, <2 x i64> undef, <2 x i32> zeroinitializer
- %4 = add <2 x i64> %1, %3
- ret <2 x i64> %4
-}
-
-define <4 x i64> @test_pbroadcastq_ymm(<4 x i64> %a0, <4 x i64> *%a1) {
-; GENERIC-LABEL: test_pbroadcastq_ymm:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [7:0.50]
-; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pbroadcastq_ymm:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [7:0.50]
-; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pbroadcastq_ymm:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [6:0.50]
-; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pbroadcastq_ymm:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00]
-; SKYLAKE-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [7:0.50]
-; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pbroadcastq_ymm:
-; SKX: # %bb.0:
-; SKX-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [3:1.00]
-; SKX-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [7:0.50]
-; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pbroadcastq_ymm:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [8:0.50]
-; ZNVER1-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [2:0.25]
-; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> zeroinitializer
- %2 = load <4 x i64>, <4 x i64> *%a1, align 32
- %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> zeroinitializer
- %4 = add <4 x i64> %1, %3
- ret <4 x i64> %4
-}
-
-define <8 x i16> @test_pbroadcastw(<8 x i16> %a0, <8 x i16> *%a1) {
-; GENERIC-LABEL: test_pbroadcastw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [7:0.50]
-; GENERIC-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pbroadcastw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [9:1.00]
-; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pbroadcastw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [9:1.00]
-; BROADWELL-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pbroadcastw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00]
-; SKYLAKE-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [7:1.00]
-; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pbroadcastw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [3:1.00]
-; SKX-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [7:1.00]
-; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pbroadcastw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [8:1.00]
-; ZNVER1-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer
- %2 = load <8 x i16>, <8 x i16> *%a1, align 16
- %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> zeroinitializer
- %4 = add <8 x i16> %1, %3
- ret <8 x i16> %4
-}
-
-define <16 x i16> @test_pbroadcastw_ymm(<16 x i16> %a0, <16 x i16> *%a1) {
-; GENERIC-LABEL: test_pbroadcastw_ymm:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [7:0.50]
-; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pbroadcastw_ymm:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [9:1.00]
-; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pbroadcastw_ymm:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [9:1.00]
-; BROADWELL-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pbroadcastw_ymm:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00]
-; SKYLAKE-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [8:1.00]
-; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pbroadcastw_ymm:
-; SKX: # %bb.0:
-; SKX-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [3:1.00]
-; SKX-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [8:1.00]
-; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pbroadcastw_ymm:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [8:2.00]
-; ZNVER1-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [2:0.25]
-; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> zeroinitializer
- %2 = load <16 x i16>, <16 x i16> *%a1, align 32
- %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> zeroinitializer
- %4 = add <16 x i16> %1, %3
- ret <16 x i16> %4
-}
-
-define <32 x i8> @test_pcmpeqb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
-; GENERIC-LABEL: test_pcmpeqb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pcmpeqb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pcmpeqb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pcmpeqb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pcmpeqb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pcmpeqb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = icmp eq <32 x i8> %a0, %a1
- %2 = sext <32 x i1> %1 to <32 x i8>
- %3 = load <32 x i8>, <32 x i8> *%a2, align 32
- %4 = icmp eq <32 x i8> %2, %3
- %5 = sext <32 x i1> %4 to <32 x i8>
- ret <32 x i8> %5
-}
-
-define <8 x i32> @test_pcmpeqd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
-; GENERIC-LABEL: test_pcmpeqd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pcmpeqd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pcmpeqd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pcmpeqd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pcmpeqd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pcmpeqd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = icmp eq <8 x i32> %a0, %a1
- %2 = sext <8 x i1> %1 to <8 x i32>
- %3 = load <8 x i32>, <8 x i32> *%a2, align 32
- %4 = icmp eq <8 x i32> %2, %3
- %5 = sext <8 x i1> %4 to <8 x i32>
- ret <8 x i32> %5
-}
-
-define <4 x i64> @test_pcmpeqq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
-; GENERIC-LABEL: test_pcmpeqq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pcmpeqq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pcmpeqq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pcmpeqq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pcmpeqq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pcmpeqq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = icmp eq <4 x i64> %a0, %a1
- %2 = sext <4 x i1> %1 to <4 x i64>
- %3 = load <4 x i64>, <4 x i64> *%a2, align 32
- %4 = icmp eq <4 x i64> %2, %3
- %5 = sext <4 x i1> %4 to <4 x i64>
- ret <4 x i64> %5
-}
-
-define <16 x i16> @test_pcmpeqw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
-; GENERIC-LABEL: test_pcmpeqw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pcmpeqw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pcmpeqw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pcmpeqw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pcmpeqw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pcmpeqw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = icmp eq <16 x i16> %a0, %a1
- %2 = sext <16 x i1> %1 to <16 x i16>
- %3 = load <16 x i16>, <16 x i16> *%a2, align 32
- %4 = icmp eq <16 x i16> %2, %3
- %5 = sext <16 x i1> %4 to <16 x i16>
- ret <16 x i16> %5
-}
-
-define <32 x i8> @test_pcmpgtb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
-; GENERIC-LABEL: test_pcmpgtb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pcmpgtb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pcmpgtb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pcmpgtb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pcmpgtb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pcmpgtb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = icmp sgt <32 x i8> %a0, %a1
- %2 = sext <32 x i1> %1 to <32 x i8>
- %3 = load <32 x i8>, <32 x i8> *%a2, align 32
- %4 = icmp sgt <32 x i8> %2, %3
- %5 = sext <32 x i1> %4 to <32 x i8>
- ret <32 x i8> %5
-}
-
-define <8 x i32> @test_pcmpgtd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
-; GENERIC-LABEL: test_pcmpgtd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pcmpgtd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pcmpgtd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pcmpgtd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pcmpgtd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pcmpgtd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpcmpgtd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = icmp sgt <8 x i32> %a0, %a1
- %2 = sext <8 x i1> %1 to <8 x i32>
- %3 = load <8 x i32>, <8 x i32> *%a2, align 32
- %4 = icmp sgt <8 x i32> %2, %3
- %5 = sext <8 x i1> %4 to <8 x i32>
- ret <8 x i32> %5
-}
-
-define <4 x i64> @test_pcmpgtq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
-; GENERIC-LABEL: test_pcmpgtq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pcmpgtq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; HASWELL-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pcmpgtq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; BROADWELL-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pcmpgtq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SKYLAKE-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pcmpgtq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SKX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pcmpgtq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; ZNVER1-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = icmp sgt <4 x i64> %a0, %a1
- %2 = sext <4 x i1> %1 to <4 x i64>
- %3 = load <4 x i64>, <4 x i64> *%a2, align 32
- %4 = icmp sgt <4 x i64> %2, %3
- %5 = sext <4 x i1> %4 to <4 x i64>
- ret <4 x i64> %5
-}
-
-define <16 x i16> @test_pcmpgtw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
-; GENERIC-LABEL: test_pcmpgtw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pcmpgtw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pcmpgtw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pcmpgtw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pcmpgtw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pcmpgtw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = icmp sgt <16 x i16> %a0, %a1
- %2 = sext <16 x i1> %1 to <16 x i16>
- %3 = load <16 x i16>, <16 x i16> *%a2, align 32
- %4 = icmp sgt <16 x i16> %2, %3
- %5 = sext <16 x i1> %4 to <16 x i16>
- ret <16 x i16> %5
-}
-
-define <4 x i64> @test_perm2i128(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
-; GENERIC-LABEL: test_perm2i128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
-; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [8:1.00]
-; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_perm2i128:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
-; HASWELL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
-; HASWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_perm2i128:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
-; BROADWELL-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [9:1.00]
-; BROADWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_perm2i128:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
-; SKYLAKE-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
-; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_perm2i128:
-; SKX: # %bb.0:
-; SKX-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
-; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
-; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_perm2i128:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vperm2i128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [2:0.25]
-; ZNVER1-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [9:0.50]
-; ZNVER1-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
- %2 = load <4 x i64>, <4 x i64> *%a2, align 32
- %3 = shufflevector <4 x i64> %a0, <4 x i64> %2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
- %4 = add <4 x i64> %1, %3
- ret <4 x i64> %4
-}
-
-define <8 x i32> @test_permd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
-; GENERIC-LABEL: test_permd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [1:1.00]
-; GENERIC-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_permd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
-; HASWELL-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; HASWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_permd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
-; BROADWELL-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
-; BROADWELL-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_permd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
-; SKYLAKE-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; SKYLAKE-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_permd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
-; SKX-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; SKX-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_permd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpermd %ymm1, %ymm0, %ymm1 # sched: [2:0.25]
-; ZNVER1-NEXT: vpermd (%rdi), %ymm0, %ymm0 # sched: [9:0.50]
-; ZNVER1-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a1, <8 x i32> %a0)
- %2 = load <8 x i32>, <8 x i32> *%a2, align 32
- %3 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %2, <8 x i32> %a0)
- %4 = add <8 x i32> %1, %3
- ret <8 x i32> %4
-}
-declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly
-
-define <4 x double> @test_permpd(<4 x double> %a0, <4 x double> *%a1) {
-; GENERIC-LABEL: test_permpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [1:1.00]
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [8:1.00]
-; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_permpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
-; HASWELL-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00]
-; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_permpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
-; BROADWELL-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [9:1.00]
-; BROADWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_permpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
-; SKYLAKE-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00]
-; SKYLAKE-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_permpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
-; SKX-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00]
-; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_permpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [107:0.50]
-; ZNVER1-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [100:0.25]
-; ZNVER1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 2, i32 3>
- %2 = load <4 x double>, <4 x double> *%a1, align 32
- %3 = shufflevector <4 x double> %2, <4 x double> undef, <4 x i32> <i32 0, i32 2, i32 2, i32 3>
- %4 = fadd <4 x double> %1, %3
- ret <4 x double> %4
-}
-
-define <8 x float> @test_permps(<8 x i32> %a0, <8 x float> %a1, <8 x float> *%a2) {
-; GENERIC-LABEL: test_permps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [1:1.00]
-; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_permps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
-; HASWELL-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_permps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
-; BROADWELL-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
-; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_permps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
-; SKYLAKE-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_permps:
-; SKX: # %bb.0:
-; SKX-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00]
-; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_permps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [100:0.25]
-; ZNVER1-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [107:0.50]
-; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a1, <8 x i32> %a0)
- %2 = load <8 x float>, <8 x float> *%a2, align 32
- %3 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %2, <8 x i32> %a0)
- %4 = fadd <8 x float> %1, %3
- ret <8 x float> %4
-}
-declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) nounwind readonly
-
-define <4 x i64> @test_permq(<4 x i64> %a0, <4 x i64> *%a1) {
-; GENERIC-LABEL: test_permq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [1:1.00]
-; GENERIC-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [8:1.00]
-; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_permq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
-; HASWELL-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00]
-; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_permq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
-; BROADWELL-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [9:1.00]
-; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_permq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
-; SKYLAKE-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00]
-; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_permq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00]
-; SKX-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00]
-; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_permq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpermq {{.*#+}} ymm1 = mem[0,2,2,3] sched: [9:0.50]
-; ZNVER1-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [2:0.25]
-; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 2, i32 3>
- %2 = load <4 x i64>, <4 x i64> *%a1, align 32
- %3 = shufflevector <4 x i64> %2, <4 x i64> undef, <4 x i32> <i32 0, i32 2, i32 2, i32 3>
- %4 = add <4 x i64> %1, %3
- ret <4 x i64> %4
-}
-
-define <4 x i32> @test_pgatherdd(<4 x i32> %a0, i8* %a1, <4 x i32> %a2, <4 x i32> %a3) {
-; GENERIC-LABEL: test_pgatherdd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pgatherdd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [26:2.67]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pgatherdd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pgatherdd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pgatherdd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pgatherdd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> %a0, i8* %a1, <4 x i32> %a2, <4 x i32> %a3, i8 2)
- ret <4 x i32> %1
-}
-declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*, <4 x i32>, <4 x i32>, i8) nounwind readonly
-
-define <8 x i32> @test_pgatherdd_ymm(<8 x i32> %a0, i8* %a1, <8 x i32> %a2, <8 x i32> %a3) {
-; GENERIC-LABEL: test_pgatherdd_ymm:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [5:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pgatherdd_ymm:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [27:6.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pgatherdd_ymm:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [5:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pgatherdd_ymm:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pgatherdd_ymm:
-; SKX: # %bb.0:
-; SKX-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pgatherdd_ymm:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> %a0, i8* %a1, <8 x i32> %a2, <8 x i32> %a3, i8 2)
- ret <8 x i32> %1
-}
-declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, i8*, <8 x i32>, <8 x i32>, i8) nounwind readonly
-
-define <2 x i64> @test_pgatherdq(<2 x i64> %a0, i8* %a1, <4 x i32> %a2, <2 x i64> %a3) {
-; GENERIC-LABEL: test_pgatherdq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pgatherdq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [26:2.67]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pgatherdq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pgatherdq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pgatherdq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pgatherdq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %a0, i8* %a1, <4 x i32> %a2, <2 x i64> %a3, i8 2)
- ret <2 x i64> %1
-}
-declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, i8*, <4 x i32>, <2 x i64>, i8) nounwind readonly
-
-define <4 x i64> @test_pgatherdq_ymm(<4 x i64> %a0, i8* %a1, <4 x i32> %a2, <4 x i64> %a3) {
-; GENERIC-LABEL: test_pgatherdq_ymm:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [5:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pgatherdq_ymm:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [27:4.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pgatherdq_ymm:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [5:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pgatherdq_ymm:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [25:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pgatherdq_ymm:
-; SKX: # %bb.0:
-; SKX-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [25:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pgatherdq_ymm:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %a0, i8* %a1, <4 x i32> %a2, <4 x i64> %a3, i8 2)
- ret <4 x i64> %1
-}
-declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, i8*, <4 x i32>, <4 x i64>, i8) nounwind readonly
-
-define <4 x i32> @test_pgatherqd(<4 x i32> %a0, i8* %a1, <2 x i64> %a2, <4 x i32> %a3) {
-; GENERIC-LABEL: test_pgatherqd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pgatherqd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [25:5.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pgatherqd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pgatherqd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pgatherqd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pgatherqd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> %a0, i8* %a1, <2 x i64> %a2, <4 x i32> %a3, i8 2)
- ret <4 x i32> %1
-}
-declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, i8*, <2 x i64>, <4 x i32>, i8) nounwind readonly
-
-define <4 x i32> @test_pgatherqd_ymm(<4 x i32> %a0, i8* %a1, <4 x i64> %a2, <4 x i32> %a3) {
-; GENERIC-LABEL: test_pgatherqd_ymm:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pgatherqd_ymm:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [28:5.00]
-; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pgatherqd_ymm:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [5:0.50]
-; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pgatherqd_ymm:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [25:1.00]
-; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pgatherqd_ymm:
-; SKX: # %bb.0:
-; SKX-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [25:1.00]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pgatherqd_ymm:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> %a0, i8* %a1, <4 x i64> %a2, <4 x i32> %a3, i8 2)
- ret <4 x i32> %1
-}
-declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, i8*, <4 x i64>, <4 x i32>, i8) nounwind readonly
-
-define <2 x i64> @test_pgatherqq(<2 x i64> %a0, i8 *%a1, <2 x i64> %a2, <2 x i64> %a3) {
-; GENERIC-LABEL: test_pgatherqq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pgatherqq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [23:3.33]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pgatherqq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pgatherqq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pgatherqq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [22:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pgatherqq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0, i8* %a1, <2 x i64> %a2, <2 x i64> %a3, i8 2)
- ret <2 x i64> %1
-}
-declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, i8*, <2 x i64>, <2 x i64>, i8) nounwind readonly
-
-define <4 x i64> @test_pgatherqq_ymm(<4 x i64> %a0, i8 *%a1, <4 x i64> %a2, <4 x i64> %a3) {
-; GENERIC-LABEL: test_pgatherqq_ymm:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [5:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pgatherqq_ymm:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [24:5.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pgatherqq_ymm:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [5:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pgatherqq_ymm:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pgatherqq_ymm:
-; SKX: # %bb.0:
-; SKX-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [25:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pgatherqq_ymm:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %a0, i8* %a1, <4 x i64> %a2, <4 x i64> %a3, i8 2)
- ret <4 x i64> %1
-}
-declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, i8*, <4 x i64>, <4 x i64>, i8) nounwind readonly
-
-define <8 x i32> @test_phaddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
-; GENERIC-LABEL: test_phaddd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.50]
-; GENERIC-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [10:1.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_phaddd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; HASWELL-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_phaddd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; BROADWELL-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_phaddd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; SKYLAKE-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_phaddd:
-; SKX: # %bb.0:
-; SKX-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; SKX-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_phaddd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # sched: [100:0.25]
-; ZNVER1-NEXT: vphaddd (%rdi), %ymm0, %ymm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %a0, <8 x i32> %a1)
- %2 = load <8 x i32>, <8 x i32> *%a2, align 32
- %3 = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %1, <8 x i32> %2)
- ret <8 x i32> %3
-}
-declare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>) nounwind readnone
-
-define <16 x i16> @test_phaddsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
-; GENERIC-LABEL: test_phaddsw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:1.50]
-; GENERIC-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [10:1.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_phaddsw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; HASWELL-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_phaddsw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; BROADWELL-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_phaddsw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; SKYLAKE-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_phaddsw:
-; SKX: # %bb.0:
-; SKX-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; SKX-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_phaddsw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # sched: [100:0.25]
-; ZNVER1-NEXT: vphaddsw (%rdi), %ymm0, %ymm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %a0, <16 x i16> %a1)
- %2 = load <16 x i16>, <16 x i16> *%a2, align 32
- %3 = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %1, <16 x i16> %2)
- ret <16 x i16> %3
-}
-declare <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16>, <16 x i16>) nounwind readnone
-
-define <16 x i16> @test_phaddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
-; GENERIC-LABEL: test_phaddw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.50]
-; GENERIC-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [10:1.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_phaddw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; HASWELL-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_phaddw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; BROADWELL-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_phaddw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; SKYLAKE-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_phaddw:
-; SKX: # %bb.0:
-; SKX-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; SKX-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_phaddw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # sched: [100:0.25]
-; ZNVER1-NEXT: vphaddw (%rdi), %ymm0, %ymm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %a0, <16 x i16> %a1)
- %2 = load <16 x i16>, <16 x i16> *%a2, align 32
- %3 = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %1, <16 x i16> %2)
- ret <16 x i16> %3
-}
-declare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>) nounwind readnone
-
-define <8 x i32> @test_phsubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
-; GENERIC-LABEL: test_phsubd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.50]
-; GENERIC-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [10:1.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_phsubd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; HASWELL-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_phsubd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; BROADWELL-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_phsubd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; SKYLAKE-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_phsubd:
-; SKX: # %bb.0:
-; SKX-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; SKX-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_phsubd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # sched: [100:0.25]
-; ZNVER1-NEXT: vphsubd (%rdi), %ymm0, %ymm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %a0, <8 x i32> %a1)
- %2 = load <8 x i32>, <8 x i32> *%a2, align 32
- %3 = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %1, <8 x i32> %2)
- ret <8 x i32> %3
-}
-declare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>) nounwind readnone
-
-define <16 x i16> @test_phsubsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
-; GENERIC-LABEL: test_phsubsw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:1.50]
-; GENERIC-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [10:1.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_phsubsw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; HASWELL-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_phsubsw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; BROADWELL-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_phsubsw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; SKYLAKE-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_phsubsw:
-; SKX: # %bb.0:
-; SKX-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; SKX-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_phsubsw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # sched: [100:0.25]
-; ZNVER1-NEXT: vphsubsw (%rdi), %ymm0, %ymm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %a0, <16 x i16> %a1)
- %2 = load <16 x i16>, <16 x i16> *%a2, align 32
- %3 = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %1, <16 x i16> %2)
- ret <16 x i16> %3
-}
-declare <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16>, <16 x i16>) nounwind readnone
-
-define <16 x i16> @test_phsubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
-; GENERIC-LABEL: test_phsubw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:1.50]
-; GENERIC-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [10:1.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_phsubw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; HASWELL-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_phsubw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; BROADWELL-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_phsubw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; SKYLAKE-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_phsubw:
-; SKX: # %bb.0:
-; SKX-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; SKX-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_phsubw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # sched: [100:0.25]
-; ZNVER1-NEXT: vphsubw (%rdi), %ymm0, %ymm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %a0, <16 x i16> %a1)
- %2 = load <16 x i16>, <16 x i16> *%a2, align 32
- %3 = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %1, <16 x i16> %2)
- ret <16 x i16> %3
-}
-declare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>) nounwind readnone
-
-define <16 x i16> @test_pmaddubsw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
-; GENERIC-LABEL: test_pmaddubsw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; GENERIC-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmaddubsw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; HASWELL-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmaddubsw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; BROADWELL-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmaddubsw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmaddubsw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pmaddubsw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
-; ZNVER1-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1)
- %2 = bitcast <16 x i16> %1 to <32 x i8>
- %3 = load <32 x i8>, <32 x i8> *%a2, align 32
- %4 = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %2, <32 x i8> %3)
- ret <16 x i16> %4
-}
-declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) nounwind readnone
-
-define <8 x i32> @test_pmaddwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
-; GENERIC-LABEL: test_pmaddwd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; GENERIC-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmaddwd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; HASWELL-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmaddwd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; BROADWELL-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmaddwd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmaddwd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pmaddwd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
-; ZNVER1-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> %a1)
- %2 = bitcast <8 x i32> %1 to <16 x i16>
- %3 = load <16 x i16>, <16 x i16> *%a2, align 32
- %4 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %2, <16 x i16> %3)
- ret <8 x i32> %4
-}
-declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) nounwind readnone
-
-define <4 x i32> @test_pmaskmovd(i8* %a0, <4 x i32> %a1, <4 x i32> %a2) {
-; GENERIC-LABEL: test_pmaskmovd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
-; GENERIC-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
-; GENERIC-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmaskmovd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [8:2.00]
-; HASWELL-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
-; HASWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmaskmovd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [7:2.00]
-; BROADWELL-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
-; BROADWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmaskmovd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
-; SKYLAKE-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
-; SKYLAKE-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmaskmovd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
-; SKX-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
-; SKX-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pmaskmovd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [100:0.25]
-; ZNVER1-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [100:0.25]
-; ZNVER1-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x i32> @llvm.x86.avx2.maskload.d(i8* %a0, <4 x i32> %a1)
- call void @llvm.x86.avx2.maskstore.d(i8* %a0, <4 x i32> %a1, <4 x i32> %a2)
- ret <4 x i32> %1
-}
-declare <4 x i32> @llvm.x86.avx2.maskload.d(i8*, <4 x i32>) nounwind readonly
-declare void @llvm.x86.avx2.maskstore.d(i8*, <4 x i32>, <4 x i32>) nounwind
-
-define <8 x i32> @test_pmaskmovd_ymm(i8* %a0, <8 x i32> %a1, <8 x i32> %a2) {
-; GENERIC-LABEL: test_pmaskmovd_ymm:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [9:1.00]
-; GENERIC-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
-; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmaskmovd_ymm:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [9:2.00]
-; HASWELL-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
-; HASWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmaskmovd_ymm:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [8:2.00]
-; BROADWELL-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
-; BROADWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmaskmovd_ymm:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
-; SKYLAKE-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
-; SKYLAKE-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmaskmovd_ymm:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
-; SKX-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
-; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pmaskmovd_ymm:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm2 # sched: [100:0.25]
-; ZNVER1-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # sched: [100:0.25]
-; ZNVER1-NEXT: vmovdqa %ymm2, %ymm0 # sched: [2:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i32> @llvm.x86.avx2.maskload.d.256(i8* %a0, <8 x i32> %a1)
- call void @llvm.x86.avx2.maskstore.d.256(i8* %a0, <8 x i32> %a1, <8 x i32> %a2)
- ret <8 x i32> %1
-}
-declare <8 x i32> @llvm.x86.avx2.maskload.d.256(i8*, <8 x i32>) nounwind readonly
-declare void @llvm.x86.avx2.maskstore.d.256(i8*, <8 x i32>, <8 x i32>) nounwind
-
-define <2 x i64> @test_pmaskmovq(i8* %a0, <2 x i64> %a1, <2 x i64> %a2) {
-; GENERIC-LABEL: test_pmaskmovq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
-; GENERIC-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
-; GENERIC-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmaskmovq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [8:2.00]
-; HASWELL-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
-; HASWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmaskmovq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [7:2.00]
-; BROADWELL-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [5:1.00]
-; BROADWELL-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmaskmovq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
-; SKYLAKE-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
-; SKYLAKE-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmaskmovq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [7:0.50]
-; SKX-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [2:1.00]
-; SKX-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pmaskmovq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [8:1.00]
-; ZNVER1-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [100:0.25]
-; ZNVER1-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %a0, <2 x i64> %a1)
- call void @llvm.x86.avx2.maskstore.q(i8* %a0, <2 x i64> %a1, <2 x i64> %a2)
- ret <2 x i64> %1
-}
-declare <2 x i64> @llvm.x86.avx2.maskload.q(i8*, <2 x i64>) nounwind readonly
-declare void @llvm.x86.avx2.maskstore.q(i8*, <2 x i64>, <2 x i64>) nounwind
-
-define <4 x i64> @test_pmaskmovq_ymm(i8* %a0, <4 x i64> %a1, <4 x i64> %a2) {
-; GENERIC-LABEL: test_pmaskmovq_ymm:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [9:1.00]
-; GENERIC-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
-; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmaskmovq_ymm:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [9:2.00]
-; HASWELL-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
-; HASWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmaskmovq_ymm:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [8:2.00]
-; BROADWELL-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [5:1.00]
-; BROADWELL-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmaskmovq_ymm:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
-; SKYLAKE-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
-; SKYLAKE-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmaskmovq_ymm:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [8:0.50]
-; SKX-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [2:1.00]
-; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pmaskmovq_ymm:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm2 # sched: [9:1.50]
-; ZNVER1-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # sched: [100:0.25]
-; ZNVER1-NEXT: vmovdqa %ymm2, %ymm0 # sched: [2:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* %a0, <4 x i64> %a1)
- call void @llvm.x86.avx2.maskstore.q.256(i8* %a0, <4 x i64> %a1, <4 x i64> %a2)
- ret <4 x i64> %1
-}
-declare <4 x i64> @llvm.x86.avx2.maskload.q.256(i8*, <4 x i64>) nounwind readonly
-declare void @llvm.x86.avx2.maskstore.q.256(i8*, <4 x i64>, <4 x i64>) nounwind
-
-define <32 x i8> @test_pmaxsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
-; GENERIC-LABEL: test_pmaxsb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmaxsb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmaxsb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmaxsb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmaxsb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pmaxsb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmaxsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpmaxsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %a0, <32 x i8> %a1)
- %2 = load <32 x i8>, <32 x i8> *%a2, align 32
- %3 = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %1, <32 x i8> %2)
- ret <32 x i8> %3
-}
-declare <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8>, <32 x i8>) nounwind readnone
-
-define <8 x i32> @test_pmaxsd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
-; GENERIC-LABEL: test_pmaxsd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmaxsd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmaxsd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmaxsd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmaxsd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pmaxsd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmaxsd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpmaxsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %a0, <8 x i32> %a1)
- %2 = load <8 x i32>, <8 x i32> *%a2, align 32
- %3 = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %1, <8 x i32> %2)
- ret <8 x i32> %3
-}
-declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readnone
-
-define <16 x i16> @test_pmaxsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
-; GENERIC-LABEL: test_pmaxsw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmaxsw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmaxsw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmaxsw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmaxsw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pmaxsw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmaxsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpmaxsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %a0, <16 x i16> %a1)
- %2 = load <16 x i16>, <16 x i16> *%a2, align 32
- %3 = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %1, <16 x i16> %2)
- ret <16 x i16> %3
-}
-declare <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16>, <16 x i16>) nounwind readnone
-
-define <32 x i8> @test_pmaxub(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
-; GENERIC-LABEL: test_pmaxub:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmaxub:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmaxub:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmaxub:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmaxub:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pmaxub:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmaxub %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpmaxub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %a0, <32 x i8> %a1)
- %2 = load <32 x i8>, <32 x i8> *%a2, align 32
- %3 = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %1, <32 x i8> %2)
- ret <32 x i8> %3
-}
-declare <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8>, <32 x i8>) nounwind readnone
-
-define <8 x i32> @test_pmaxud(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
-; GENERIC-LABEL: test_pmaxud:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmaxud:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmaxud:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmaxud:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmaxud:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pmaxud:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpmaxud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %a0, <8 x i32> %a1)
- %2 = load <8 x i32>, <8 x i32> *%a2, align 32
- %3 = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %1, <8 x i32> %2)
- ret <8 x i32> %3
-}
-declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readnone
-
-define <16 x i16> @test_pmaxuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
-; GENERIC-LABEL: test_pmaxuw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmaxuw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmaxuw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmaxuw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmaxuw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pmaxuw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmaxuw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpmaxuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %a0, <16 x i16> %a1)
- %2 = load <16 x i16>, <16 x i16> *%a2, align 32
- %3 = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %1, <16 x i16> %2)
- ret <16 x i16> %3
-}
-declare <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16>, <16 x i16>) nounwind readnone
-
-define <32 x i8> @test_pminsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
-; GENERIC-LABEL: test_pminsb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pminsb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pminsb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pminsb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pminsb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pminsb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpminsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpminsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %a0, <32 x i8> %a1)
- %2 = load <32 x i8>, <32 x i8> *%a2, align 32
- %3 = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %1, <32 x i8> %2)
- ret <32 x i8> %3
-}
-declare <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8>, <32 x i8>) nounwind readnone
-
-define <8 x i32> @test_pminsd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
-; GENERIC-LABEL: test_pminsd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pminsd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pminsd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pminsd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pminsd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pminsd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpminsd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpminsd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %a0, <8 x i32> %a1)
- %2 = load <8 x i32>, <8 x i32> *%a2, align 32
- %3 = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %1, <8 x i32> %2)
- ret <8 x i32> %3
-}
-declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readnone
-
-define <16 x i16> @test_pminsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
-; GENERIC-LABEL: test_pminsw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pminsw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pminsw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pminsw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pminsw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pminsw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpminsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpminsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %a0, <16 x i16> %a1)
- %2 = load <16 x i16>, <16 x i16> *%a2, align 32
- %3 = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %1, <16 x i16> %2)
- ret <16 x i16> %3
-}
-declare <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16>, <16 x i16>) nounwind readnone
-
-define <32 x i8> @test_pminub(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
-; GENERIC-LABEL: test_pminub:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pminub:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pminub:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pminub:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pminub:
-; SKX: # %bb.0:
-; SKX-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pminub:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpminub %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpminub (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %a0, <32 x i8> %a1)
- %2 = load <32 x i8>, <32 x i8> *%a2, align 32
- %3 = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %1, <32 x i8> %2)
- ret <32 x i8> %3
-}
-declare <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8>, <32 x i8>) nounwind readnone
-
-define <8 x i32> @test_pminud(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
-; GENERIC-LABEL: test_pminud:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pminud:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pminud:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pminud:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pminud:
-; SKX: # %bb.0:
-; SKX-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pminud:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpminud %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpminud (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %a0, <8 x i32> %a1)
- %2 = load <8 x i32>, <8 x i32> *%a2, align 32
- %3 = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %1, <8 x i32> %2)
- ret <8 x i32> %3
-}
-declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readnone
-
-define <16 x i16> @test_pminuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
-; GENERIC-LABEL: test_pminuw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pminuw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pminuw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pminuw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pminuw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pminuw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpminuw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpminuw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %a0, <16 x i16> %a1)
- %2 = load <16 x i16>, <16 x i16> *%a2, align 32
- %3 = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %1, <16 x i16> %2)
- ret <16 x i16> %3
-}
-declare <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16>, <16 x i16>) nounwind readnone
-
-define i32 @test_pmovmskb(<32 x i8> %a0) {
-; GENERIC-LABEL: test_pmovmskb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmovmskb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmovmskb %ymm0, %eax # sched: [3:1.00]
-; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmovmskb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmovmskb %ymm0, %eax # sched: [3:1.00]
-; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmovmskb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00]
-; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmovmskb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovmskb %ymm0, %eax # sched: [2:1.00]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pmovmskb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmovmskb %ymm0, %eax # sched: [2:2.00]
-; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %a0)
- ret i32 %1
-}
-declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>) nounwind readnone
-
-define <8 x i32> @test_pmovsxbd(<16 x i8> %a0, <16 x i8> *%a1) {
-; GENERIC-LABEL: test_pmovsxbd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00]
-; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmovsxbd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00]
-; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmovsxbd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00]
-; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmovsxbd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00]
-; SKYLAKE-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00]
-; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmovsxbd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [3:1.00]
-; SKX-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:1.00]
-; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pmovsxbd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmovsxbd (%rdi), %ymm1 # sched: [8:0.50]
-; ZNVER1-NEXT: vpmovsxbd %xmm0, %ymm0 # sched: [1:0.50]
-; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
- %2 = sext <8 x i8> %1 to <8 x i32>
- %3 = load <16 x i8>, <16 x i8> *%a1, align 16
- %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
- %5 = sext <8 x i8> %4 to <8 x i32>
- %6 = add <8 x i32> %2, %5
- ret <8 x i32> %6
-}
-
-define <4 x i64> @test_pmovsxbq(<16 x i8> %a0, <16 x i8> *%a1) {
-; GENERIC-LABEL: test_pmovsxbq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00]
-; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmovsxbq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00]
-; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmovsxbq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00]
-; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmovsxbq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00]
-; SKYLAKE-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00]
-; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmovsxbq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [3:1.00]
-; SKX-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:1.00]
-; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pmovsxbq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmovsxbq (%rdi), %ymm1 # sched: [8:0.50]
-; ZNVER1-NEXT: vpmovsxbq %xmm0, %ymm0 # sched: [1:0.50]
-; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %2 = sext <4 x i8> %1 to <4 x i64>
- %3 = load <16 x i8>, <16 x i8> *%a1, align 16
- %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %5 = sext <4 x i8> %4 to <4 x i64>
- %6 = add <4 x i64> %2, %5
- ret <4 x i64> %6
-}
-
-define <16 x i16> @test_pmovsxbw(<16 x i8> %a0, <16 x i8> *%a1) {
-; GENERIC-LABEL: test_pmovsxbw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [8:1.00]
-; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmovsxbw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [9:1.00]
-; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmovsxbw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [8:1.00]
-; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmovsxbw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00]
-; SKYLAKE-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [9:1.00]
-; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmovsxbw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00]
-; SKX-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [9:1.00]
-; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pmovsxbw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmovsxbw (%rdi), %ymm1 # sched: [8:0.50]
-; ZNVER1-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [1:0.50]
-; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = sext <16 x i8> %a0 to <16 x i16>
- %2 = load <16 x i8>, <16 x i8> *%a1, align 16
- %3 = sext <16 x i8> %2 to <16 x i16>
- %4 = add <16 x i16> %1, %3
- ret <16 x i16> %4
-}
-
-define <4 x i64> @test_pmovsxdq(<4 x i32> %a0, <4 x i32> *%a1) {
-; GENERIC-LABEL: test_pmovsxdq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [8:1.00]
-; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmovsxdq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [9:1.00]
-; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmovsxdq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [8:1.00]
-; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmovsxdq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00]
-; SKYLAKE-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [9:1.00]
-; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmovsxdq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00]
-; SKX-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [9:1.00]
-; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pmovsxdq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmovsxdq (%rdi), %ymm1 # sched: [8:0.50]
-; ZNVER1-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [1:0.50]
-; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = sext <4 x i32> %a0 to <4 x i64>
- %2 = load <4 x i32>, <4 x i32> *%a1, align 16
- %3 = sext <4 x i32> %2 to <4 x i64>
- %4 = add <4 x i64> %1, %3
- ret <4 x i64> %4
-}
-
-define <8 x i32> @test_pmovsxwd(<8 x i16> %a0, <8 x i16> *%a1) {
-; GENERIC-LABEL: test_pmovsxwd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [8:1.00]
-; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmovsxwd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [9:1.00]
-; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmovsxwd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [8:1.00]
-; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmovsxwd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00]
-; SKYLAKE-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [9:1.00]
-; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmovsxwd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00]
-; SKX-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [9:1.00]
-; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pmovsxwd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmovsxwd (%rdi), %ymm1 # sched: [8:0.50]
-; ZNVER1-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:0.50]
-; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = sext <8 x i16> %a0 to <8 x i32>
- %2 = load <8 x i16>, <8 x i16> *%a1, align 16
- %3 = sext <8 x i16> %2 to <8 x i32>
- %4 = add <8 x i32> %1, %3
- ret <8 x i32> %4
-}
-
-define <4 x i64> @test_pmovsxwq(<8 x i16> %a0, <8 x i16> *%a1) {
-; GENERIC-LABEL: test_pmovsxwq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00]
-; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmovsxwq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00]
-; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmovsxwq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00]
-; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmovsxwq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00]
-; SKYLAKE-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00]
-; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmovsxwq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [3:1.00]
-; SKX-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:1.00]
-; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pmovsxwq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmovsxwq (%rdi), %ymm1 # sched: [8:0.50]
-; ZNVER1-NEXT: vpmovsxwq %xmm0, %ymm0 # sched: [1:0.50]
-; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %2 = sext <4 x i16> %1 to <4 x i64>
- %3 = load <8 x i16>, <8 x i16> *%a1, align 16
- %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %5 = sext <4 x i16> %4 to <4 x i64>
- %6 = add <4 x i64> %2, %5
- ret <4 x i64> %6
-}
-
-define <8 x i32> @test_pmovzxbd(<16 x i8> %a0, <16 x i8> *%a1) {
-; GENERIC-LABEL: test_pmovzxbd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00]
-; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:1.00]
-; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmovzxbd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
-; HASWELL-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00]
-; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmovzxbd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
-; BROADWELL-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [9:1.00]
-; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmovzxbd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
-; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00]
-; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmovzxbd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
-; SKX-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00]
-; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pmovzxbd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmovzxbd {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:0.50]
-; ZNVER1-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:0.50]
-; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
- %2 = zext <8 x i8> %1 to <8 x i32>
- %3 = load <16 x i8>, <16 x i8> *%a1, align 16
- %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
- %5 = zext <8 x i8> %4 to <8 x i32>
- %6 = add <8 x i32> %2, %5
- ret <8 x i32> %6
-}
-
-define <4 x i64> @test_pmovzxbq(<16 x i8> %a0, <16 x i8> *%a1) {
-; GENERIC-LABEL: test_pmovzxbq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
-; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00]
-; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmovzxbq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00]
-; HASWELL-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00]
-; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmovzxbq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00]
-; BROADWELL-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [9:1.00]
-; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmovzxbq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00]
-; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00]
-; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmovzxbq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00]
-; SKX-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00]
-; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pmovzxbq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [8:0.50]
-; ZNVER1-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
-; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %2 = zext <4 x i8> %1 to <4 x i64>
- %3 = load <16 x i8>, <16 x i8> *%a1, align 16
- %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %5 = zext <4 x i8> %4 to <4 x i64>
- %6 = add <4 x i64> %2, %5
- ret <4 x i64> %6
-}
-
-define <16 x i16> @test_pmovzxbw(<16 x i8> %a0, <16 x i8> *%a1) {
-; GENERIC-LABEL: test_pmovzxbw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00]
-; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:1.00]
-; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmovzxbw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
-; HASWELL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00]
-; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmovzxbw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
-; BROADWELL-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [9:1.00]
-; BROADWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmovzxbw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
-; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00]
-; SKYLAKE-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmovzxbw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
-; SKX-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00]
-; SKX-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pmovzxbw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:0.50]
-; ZNVER1-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:0.50]
-; ZNVER1-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = zext <16 x i8> %a0 to <16 x i16>
- %2 = load <16 x i8>, <16 x i8> *%a1, align 16
- %3 = zext <16 x i8> %2 to <16 x i16>
- %4 = add <16 x i16> %1, %3
- ret <16 x i16> %4
-}
-
-define <4 x i64> @test_pmovzxdq(<4 x i32> %a0, <4 x i32> *%a1) {
-; GENERIC-LABEL: test_pmovzxdq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
-; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:1.00]
-; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmovzxdq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00]
-; HASWELL-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00]
-; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmovzxdq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00]
-; BROADWELL-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [9:1.00]
-; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmovzxdq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00]
-; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00]
-; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmovzxdq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00]
-; SKX-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00]
-; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pmovzxdq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmovzxdq {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:0.50]
-; ZNVER1-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
-; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = zext <4 x i32> %a0 to <4 x i64>
- %2 = load <4 x i32>, <4 x i32> *%a1, align 16
- %3 = zext <4 x i32> %2 to <4 x i64>
- %4 = add <4 x i64> %1, %3
- ret <4 x i64> %4
-}
-
-define <8 x i32> @test_pmovzxwd(<8 x i16> %a0, <8 x i16> *%a1) {
-; GENERIC-LABEL: test_pmovzxwd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
-; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00]
-; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmovzxwd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
-; HASWELL-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00]
-; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmovzxwd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
-; BROADWELL-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00]
-; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmovzxwd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
-; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00]
-; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmovzxwd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
-; SKX-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00]
-; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pmovzxwd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:0.50]
-; ZNVER1-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
-; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = zext <8 x i16> %a0 to <8 x i32>
- %2 = load <8 x i16>, <8 x i16> *%a1, align 16
- %3 = zext <8 x i16> %2 to <8 x i32>
- %4 = add <8 x i32> %1, %3
- ret <8 x i32> %4
-}
-
-define <4 x i64> @test_pmovzxwq(<8 x i16> %a0, <8 x i16> *%a1) {
-; GENERIC-LABEL: test_pmovzxwq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
-; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:1.00]
-; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmovzxwq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00]
-; HASWELL-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00]
-; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmovzxwq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00]
-; BROADWELL-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [9:1.00]
-; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmovzxwq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00]
-; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00]
-; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmovzxwq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00]
-; SKX-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00]
-; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pmovzxwq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmovzxwq {{.*#+}} ymm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:0.50]
-; ZNVER1-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
-; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %2 = zext <4 x i16> %1 to <4 x i64>
- %3 = load <8 x i16>, <8 x i16> *%a1, align 16
- %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %5 = zext <4 x i16> %4 to <4 x i64>
- %6 = add <4 x i64> %2, %5
- ret <4 x i64> %6
-}
-
-define <4 x i64> @test_pmuldq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2, <8 x i32> *%a3) {
-; GENERIC-LABEL: test_pmuldq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; GENERIC-NEXT: vpmuldq (%rdi), %ymm2, %ymm1 # sched: [12:1.00]
-; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmuldq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; HASWELL-NEXT: vpmuldq (%rdi), %ymm2, %ymm1 # sched: [12:1.00]
-; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmuldq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; BROADWELL-NEXT: vpmuldq (%rdi), %ymm2, %ymm1 # sched: [11:1.00]
-; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmuldq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vpmuldq (%rdi), %ymm2, %ymm1 # sched: [11:0.50]
-; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmuldq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: vpmuldq (%rdi), %ymm2, %ymm1 # sched: [11:0.50]
-; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pmuldq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmuldq (%rdi), %ymm2, %ymm2 # sched: [11:1.00]
-; ZNVER1-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
-; ZNVER1-NEXT: vpor %ymm2, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a0, <8 x i32> %a1)
- %2 = load <8 x i32>, <8 x i32> *%a3, align 32
- %3 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a2, <8 x i32> %2)
- %4 = or <4 x i64> %1, %3
- ret <4 x i64> %4
-}
-declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone
-
-define <16 x i16> @test_pmulhrsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
-; GENERIC-LABEL: test_pmulhrsw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; GENERIC-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmulhrsw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; HASWELL-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmulhrsw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; BROADWELL-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmulhrsw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmulhrsw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pmulhrsw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
-; ZNVER1-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> %a1)
- %2 = load <16 x i16>, <16 x i16> *%a2, align 32
- %3 = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %1, <16 x i16> %2)
- ret <16 x i16> %3
-}
-declare <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16>, <16 x i16>) nounwind readnone
-
-define <16 x i16> @test_pmulhuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
-; GENERIC-LABEL: test_pmulhuw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; GENERIC-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmulhuw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; HASWELL-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmulhuw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; BROADWELL-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmulhuw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmulhuw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pmulhuw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
-; ZNVER1-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> %a1)
- %2 = load <16 x i16>, <16 x i16> *%a2, align 32
- %3 = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %1, <16 x i16> %2)
- ret <16 x i16> %3
-}
-declare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>) nounwind readnone
-
-define <16 x i16> @test_pmulhw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
-; GENERIC-LABEL: test_pmulhw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; GENERIC-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmulhw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; HASWELL-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmulhw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; BROADWELL-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmulhw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmulhw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pmulhw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
-; ZNVER1-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> %a1)
- %2 = load <16 x i16>, <16 x i16> *%a2, align 32
- %3 = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %1, <16 x i16> %2)
- ret <16 x i16> %3
-}
-declare <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16>, <16 x i16>) nounwind readnone
-
-define <8 x i32> @test_pmulld(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
-; GENERIC-LABEL: test_pmulld:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; GENERIC-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmulld:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:2.00]
-; HASWELL-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [17:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmulld:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:2.00]
-; BROADWELL-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [16:2.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmulld:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:1.00]
-; SKYLAKE-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [17:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmulld:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:1.00]
-; SKX-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [17:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pmulld:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [5:2.00]
-; ZNVER1-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [12:2.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = mul <8 x i32> %a0, %a1
- %2 = load <8 x i32>, <8 x i32> *%a2, align 32
- %3 = mul <8 x i32> %1, %2
- ret <8 x i32> %3
-}
-
-define <16 x i16> @test_pmullw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
-; GENERIC-LABEL: test_pmullw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; GENERIC-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmullw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; HASWELL-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmullw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; BROADWELL-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmullw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmullw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pmullw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
-; ZNVER1-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = mul <16 x i16> %a0, %a1
- %2 = load <16 x i16>, <16 x i16> *%a2, align 32
- %3 = mul <16 x i16> %1, %2
- ret <16 x i16> %3
-}
-
-define <4 x i64> @test_pmuludq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
-; GENERIC-LABEL: test_pmuludq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; GENERIC-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmuludq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; HASWELL-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmuludq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; BROADWELL-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmuludq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmuludq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pmuludq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:1.00]
-; ZNVER1-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %a0, <8 x i32> %a1)
- %2 = bitcast <4 x i64> %1 to <8 x i32>
- %3 = load <8 x i32>, <8 x i32> *%a2, align 32
- %4 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %2, <8 x i32> %3)
- ret <4 x i64> %4
-}
-declare <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32>, <8 x i32>) nounwind readnone
-
-define <4 x i64> @test_por(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
-; GENERIC-LABEL: test_por:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; GENERIC-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_por:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; HASWELL-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_por:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; BROADWELL-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_por:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_por:
-; SKX: # %bb.0:
-; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_por:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = or <4 x i64> %a0, %a1
- %2 = load <4 x i64>, <4 x i64> *%a2, align 32
- %3 = or <4 x i64> %1, %2
- %4 = add <4 x i64> %3, %a1
- ret <4 x i64> %4
-}
-
-define <4 x i64> @test_psadbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
-; GENERIC-LABEL: test_psadbw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; GENERIC-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psadbw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; HASWELL-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [12:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psadbw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; BROADWELL-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psadbw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SKYLAKE-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psadbw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; SKX-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_psadbw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vpsadbw (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %a0, <32 x i8> %a1)
- %2 = bitcast <4 x i64> %1 to <32 x i8>
- %3 = load <32 x i8>, <32 x i8> *%a2, align 32
- %4 = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %2, <32 x i8> %3)
- ret <4 x i64> %4
-}
-declare <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8>, <32 x i8>) nounwind readnone
-
-define <32 x i8> @test_pshufb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
-; GENERIC-LABEL: test_pshufb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pshufb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; HASWELL-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pshufb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pshufb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pshufb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; SKX-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pshufb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpshufb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> %a1)
- %2 = load <32 x i8>, <32 x i8> *%a2, align 32
- %3 = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %1, <32 x i8> %2)
- ret <32 x i8> %3
-}
-declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) nounwind readnone
-
-define <8 x i32> @test_pshufd(<8 x i32> %a0, <8 x i32> *%a1) {
-; GENERIC-LABEL: test_pshufd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
-; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:1.00]
-; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pshufd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
-; HASWELL-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:1.00]
-; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pshufd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
-; BROADWELL-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [7:1.00]
-; BROADWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pshufd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
-; SKYLAKE-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:1.00]
-; SKYLAKE-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pshufd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:1.00]
-; SKX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pshufd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpshufd {{.*#+}} ymm1 = mem[1,0,3,2,5,4,7,6] sched: [8:0.50]
-; ZNVER1-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:0.25]
-; ZNVER1-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
- %2 = load <8 x i32>, <8 x i32> *%a1, align 32
- %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
- %4 = add <8 x i32> %1, %3
- ret <8 x i32> %4
-}
-
-define <16 x i16> @test_pshufhw(<16 x i16> %a0, <16 x i16> *%a1) {
-; GENERIC-LABEL: test_pshufhw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00]
-; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:1.00]
-; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pshufhw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00]
-; HASWELL-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:1.00]
-; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pshufhw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00]
-; BROADWELL-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [7:1.00]
-; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pshufhw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00]
-; SKYLAKE-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:1.00]
-; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pshufhw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:1.00]
-; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pshufhw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpshufhw {{.*#+}} ymm1 = mem[0,1,2,3,5,4,7,6,8,9,10,11,13,12,15,14] sched: [8:0.50]
-; ZNVER1-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,6,5,4,8,9,10,11,15,14,13,12] sched: [1:0.25]
-; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 5, i32 4, i32 8, i32 9, i32 10, i32 11, i32 15, i32 14, i32 13, i32 12>
- %2 = load <16 x i16>, <16 x i16> *%a1, align 32
- %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 7, i32 6, i32 8, i32 9, i32 10, i32 11, i32 13, i32 12, i32 15, i32 14>
- %4 = or <16 x i16> %1, %3
- ret <16 x i16> %4
-}
-
-define <16 x i16> @test_pshuflw(<16 x i16> %a0, <16 x i16> *%a1) {
-; GENERIC-LABEL: test_pshuflw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00]
-; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:1.00]
-; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pshuflw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00]
-; HASWELL-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:1.00]
-; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pshuflw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00]
-; BROADWELL-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [7:1.00]
-; BROADWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pshuflw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00]
-; SKYLAKE-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:1.00]
-; SKYLAKE-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pshuflw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:1.00]
-; SKX-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pshuflw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpshuflw {{.*#+}} ymm1 = mem[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15] sched: [8:0.50]
-; ZNVER1-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7,11,10,9,8,12,13,14,15] sched: [1:0.25]
-; ZNVER1-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 9, i32 8, i32 12, i32 13, i32 14, i32 15>
- %2 = load <16 x i16>, <16 x i16> *%a1, align 32
- %3 = shufflevector <16 x i16> %2, <16 x i16> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 9, i32 8, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15>
- %4 = or <16 x i16> %1, %3
- ret <16 x i16> %4
-}
-
-define <32 x i8> @test_psignb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
-; GENERIC-LABEL: test_psignb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psignb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psignb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psignb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psignb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_psignb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpsignb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %a0, <32 x i8> %a1)
- %2 = load <32 x i8>, <32 x i8> *%a2, align 32
- %3 = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %1, <32 x i8> %2)
- ret <32 x i8> %3
-}
-declare <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8>, <32 x i8>) nounwind readnone
-
-define <8 x i32> @test_psignd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
-; GENERIC-LABEL: test_psignd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psignd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psignd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psignd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psignd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_psignd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpsignd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %a0, <8 x i32> %a1)
- %2 = load <8 x i32>, <8 x i32> *%a2, align 32
- %3 = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %1, <8 x i32> %2)
- ret <8 x i32> %3
-}
-declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) nounwind readnone
-
-define <16 x i16> @test_psignw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
-; GENERIC-LABEL: test_psignw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psignw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psignw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psignw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psignw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_psignw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpsignw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %a0, <16 x i16> %a1)
- %2 = load <16 x i16>, <16 x i16> *%a2, align 32
- %3 = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %1, <16 x i16> %2)
- ret <16 x i16> %3
-}
-declare <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16>, <16 x i16>) nounwind readnone
-
-define <8 x i32> @test_pslld(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_pslld:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; GENERIC-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
-; GENERIC-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pslld:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; HASWELL-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; HASWELL-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pslld:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; BROADWELL-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; BROADWELL-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pslld:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; SKYLAKE-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pslld:
-; SKX: # %bb.0:
-; SKX-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; SKX-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pslld:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpslld %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
-; ZNVER1-NEXT: vpslld (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
-; ZNVER1-NEXT: vpslld $2, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %a0, <4 x i32> %a1)
- %2 = load <4 x i32>, <4 x i32> *%a2, align 16
- %3 = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %1, <4 x i32> %2)
- %4 = shl <8 x i32> %3, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
- ret <8 x i32> %4
-}
-declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone
-
-define <32 x i8> @test_pslldq(<32 x i8> %a0) {
-; GENERIC-LABEL: test_pslldq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pslldq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pslldq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pslldq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pslldq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pslldq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,ymm0[0,1,2,3,4,5,6,7,8,9,10,11,12],zero,zero,zero,ymm0[16,17,18,19,20,21,22,23,24,25,26,27,28] sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <32 x i8> zeroinitializer, <32 x i8> %a0, <32 x i32> <i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60>
- ret <32 x i8> %1
-}
-
-define <4 x i64> @test_psllq(<4 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
-; GENERIC-LABEL: test_psllq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; GENERIC-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
-; GENERIC-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psllq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; HASWELL-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; HASWELL-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psllq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; BROADWELL-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; BROADWELL-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psllq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; SKYLAKE-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psllq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; SKX-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_psllq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
-; ZNVER1-NEXT: vpsllq (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
-; ZNVER1-NEXT: vpsllq $2, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %a0, <2 x i64> %a1)
- %2 = load <2 x i64>, <2 x i64> *%a2, align 16
- %3 = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %1, <2 x i64> %2)
- %4 = shl <4 x i64> %3, <i64 2, i64 2, i64 2, i64 2>
- ret <4 x i64> %4
-}
-declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) nounwind readnone
-
-define <4 x i32> @test_psllvd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_psllvd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psllvd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; HASWELL-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psllvd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; BROADWELL-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psllvd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psllvd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_psllvd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; ZNVER1-NEXT: vpsllvd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %a0, <4 x i32> %a1)
- %2 = load <4 x i32>, <4 x i32> *%a2, align 16
- %3 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %1, <4 x i32> %2)
- ret <4 x i32> %3
-}
-declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) nounwind readnone
-
-define <8 x i32> @test_psllvd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
-; GENERIC-LABEL: test_psllvd_ymm:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psllvd_ymm:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; HASWELL-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psllvd_ymm:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; BROADWELL-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psllvd_ymm:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psllvd_ymm:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_psllvd_ymm:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; ZNVER1-NEXT: vpsllvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %a0, <8 x i32> %a1)
- %2 = load <8 x i32>, <8 x i32> *%a2, align 32
- %3 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %1, <8 x i32> %2)
- ret <8 x i32> %3
-}
-declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
-
-define <2 x i64> @test_psllvq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
-; GENERIC-LABEL: test_psllvq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psllvq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psllvq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psllvq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psllvq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_psllvq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; ZNVER1-NEXT: vpsllvq (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %a0, <2 x i64> %a1)
- %2 = load <2 x i64>, <2 x i64> *%a2, align 16
- %3 = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %1, <2 x i64> %2)
- ret <2 x i64> %3
-}
-declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) nounwind readnone
-
-define <4 x i64> @test_psllvq_ymm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
-; GENERIC-LABEL: test_psllvq_ymm:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psllvq_ymm:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; HASWELL-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psllvq_ymm:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psllvq_ymm:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psllvq_ymm:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_psllvq_ymm:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; ZNVER1-NEXT: vpsllvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %a0, <4 x i64> %a1)
- %2 = load <4 x i64>, <4 x i64> *%a2, align 32
- %3 = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %1, <4 x i64> %2)
- ret <4 x i64> %3
-}
-declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
-
-define <16 x i16> @test_psllw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_psllw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; GENERIC-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
-; GENERIC-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psllw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; HASWELL-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; HASWELL-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psllw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; BROADWELL-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; BROADWELL-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psllw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; SKYLAKE-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psllw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; SKX-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_psllw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
-; ZNVER1-NEXT: vpsllw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
-; ZNVER1-NEXT: vpsllw $2, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %a0, <8 x i16> %a1)
- %2 = load <8 x i16>, <8 x i16> *%a2, align 16
- %3 = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %1, <8 x i16> %2)
- %4 = shl <16 x i16> %3, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
- ret <16 x i16> %4
-}
-declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) nounwind readnone
-
-define <8 x i32> @test_psrad(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_psrad:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; GENERIC-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
-; GENERIC-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psrad:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; HASWELL-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; HASWELL-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psrad:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; BROADWELL-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; BROADWELL-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psrad:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; SKYLAKE-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psrad:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; SKX-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_psrad:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
-; ZNVER1-NEXT: vpsrad (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
-; ZNVER1-NEXT: vpsrad $2, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %a0, <4 x i32> %a1)
- %2 = load <4 x i32>, <4 x i32> *%a2, align 16
- %3 = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> %2)
- %4 = ashr <8 x i32> %3, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
- ret <8 x i32> %4
-}
-declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) nounwind readnone
-
-define <4 x i32> @test_psravd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_psravd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psravd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; HASWELL-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psravd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; BROADWELL-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psravd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psravd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_psravd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; ZNVER1-NEXT: vpsravd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %a0, <4 x i32> %a1)
- %2 = load <4 x i32>, <4 x i32> *%a2, align 16
- %3 = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %1, <4 x i32> %2)
- ret <4 x i32> %3
-}
-declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone
-
-define <8 x i32> @test_psravd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
-; GENERIC-LABEL: test_psravd_ymm:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psravd_ymm:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; HASWELL-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psravd_ymm:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; BROADWELL-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psravd_ymm:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psravd_ymm:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_psravd_ymm:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; ZNVER1-NEXT: vpsravd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %a0, <8 x i32> %a1)
- %2 = load <8 x i32>, <8 x i32> *%a2, align 32
- %3 = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %1, <8 x i32> %2)
- ret <8 x i32> %3
-}
-declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone
-
-define <16 x i16> @test_psraw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_psraw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; GENERIC-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
-; GENERIC-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psraw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; HASWELL-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; HASWELL-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psraw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; BROADWELL-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; BROADWELL-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psraw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; SKYLAKE-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psraw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; SKX-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_psraw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
-; ZNVER1-NEXT: vpsraw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
-; ZNVER1-NEXT: vpsraw $2, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %a0, <8 x i16> %a1)
- %2 = load <8 x i16>, <8 x i16> *%a2, align 16
- %3 = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> %2)
- %4 = ashr <16 x i16> %3, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
- ret <16 x i16> %4
-}
-declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) nounwind readnone
-
-define <8 x i32> @test_psrld(<8 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_psrld:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; GENERIC-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
-; GENERIC-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psrld:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; HASWELL-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; HASWELL-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psrld:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; BROADWELL-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; BROADWELL-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psrld:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; SKYLAKE-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psrld:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; SKX-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_psrld:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
-; ZNVER1-NEXT: vpsrld (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
-; ZNVER1-NEXT: vpsrld $2, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %a0, <4 x i32> %a1)
- %2 = load <4 x i32>, <4 x i32> *%a2, align 16
- %3 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %1, <4 x i32> %2)
- %4 = lshr <8 x i32> %3, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
- ret <8 x i32> %4
-}
-declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone
-
-define <32 x i8> @test_psrldq(<32 x i8> %a0) {
-; GENERIC-LABEL: test_psrldq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psrldq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psrldq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psrldq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psrldq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_psrldq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsrldq {{.*#+}} ymm0 = ymm0[3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,ymm0[19,20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <32 x i8> %a0, <32 x i8> zeroinitializer, <32 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 32, i32 33, i32 34, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 48, i32 49, i32 50>
- ret <32 x i8> %1
-}
-
-define <4 x i64> @test_psrlq(<4 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
-; GENERIC-LABEL: test_psrlq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; GENERIC-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
-; GENERIC-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psrlq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; HASWELL-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; HASWELL-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psrlq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; BROADWELL-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; BROADWELL-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psrlq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; SKYLAKE-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psrlq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; SKX-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_psrlq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
-; ZNVER1-NEXT: vpsrlq (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
-; ZNVER1-NEXT: vpsrlq $2, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %a0, <2 x i64> %a1)
- %2 = load <2 x i64>, <2 x i64> *%a2, align 16
- %3 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %1, <2 x i64> %2)
- %4 = lshr <4 x i64> %3, <i64 2, i64 2, i64 2, i64 2>
- ret <4 x i64> %4
-}
-declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) nounwind readnone
-
-define <4 x i32> @test_psrlvd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_psrlvd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psrlvd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; HASWELL-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psrlvd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; BROADWELL-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psrlvd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psrlvd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_psrlvd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; ZNVER1-NEXT: vpsrlvd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %a0, <4 x i32> %a1)
- %2 = load <4 x i32>, <4 x i32> *%a2, align 16
- %3 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %1, <4 x i32> %2)
- ret <4 x i32> %3
-}
-declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) nounwind readnone
-
-define <8 x i32> @test_psrlvd_ymm(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
-; GENERIC-LABEL: test_psrlvd_ymm:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psrlvd_ymm:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; HASWELL-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [10:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psrlvd_ymm:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [3:2.00]
-; BROADWELL-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [9:2.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psrlvd_ymm:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psrlvd_ymm:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_psrlvd_ymm:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; ZNVER1-NEXT: vpsrlvd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %a0, <8 x i32> %a1)
- %2 = load <8 x i32>, <8 x i32> *%a2, align 32
- %3 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %1, <8 x i32> %2)
- ret <8 x i32> %3
-}
-declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
-
-define <2 x i64> @test_psrlvq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
-; GENERIC-LABEL: test_psrlvq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psrlvq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psrlvq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psrlvq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psrlvq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_psrlvq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; ZNVER1-NEXT: vpsrlvq (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %a0, <2 x i64> %a1)
- %2 = load <2 x i64>, <2 x i64> *%a2, align 16
- %3 = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %1, <2 x i64> %2)
- ret <2 x i64> %3
-}
-declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) nounwind readnone
-
-define <4 x i64> @test_psrlvq_ymm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
-; GENERIC-LABEL: test_psrlvq_ymm:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psrlvq_ymm:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; HASWELL-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psrlvq_ymm:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psrlvq_ymm:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psrlvq_ymm:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_psrlvq_ymm:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; ZNVER1-NEXT: vpsrlvq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %a0, <4 x i64> %a1)
- %2 = load <4 x i64>, <4 x i64> *%a2, align 32
- %3 = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %1, <4 x i64> %2)
- ret <4 x i64> %3
-}
-declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
-
-define <16 x i16> @test_psrlw(<16 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_psrlw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; GENERIC-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [11:1.00]
-; GENERIC-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psrlw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; HASWELL-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; HASWELL-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psrlw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; BROADWELL-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; BROADWELL-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psrlw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; SKYLAKE-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psrlw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [4:1.00]
-; SKX-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_psrlw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # sched: [2:1.00]
-; ZNVER1-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # sched: [9:1.00]
-; ZNVER1-NEXT: vpsrlw $2, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %a0, <8 x i16> %a1)
- %2 = load <8 x i16>, <8 x i16> *%a2, align 16
- %3 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %1, <8 x i16> %2)
- %4 = lshr <16 x i16> %3, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
- ret <16 x i16> %4
-}
-declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) nounwind readnone
-
-define <32 x i8> @test_psubb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
-; GENERIC-LABEL: test_psubb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psubb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psubb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psubb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psubb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_psubb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = sub <32 x i8> %a0, %a1
- %2 = load <32 x i8>, <32 x i8> *%a2, align 32
- %3 = sub <32 x i8> %1, %2
- ret <32 x i8> %3
-}
-
-define <8 x i32> @test_psubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
-; GENERIC-LABEL: test_psubd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psubd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psubd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psubd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psubd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_psubd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = sub <8 x i32> %a0, %a1
- %2 = load <8 x i32>, <8 x i32> *%a2, align 32
- %3 = sub <8 x i32> %1, %2
- ret <8 x i32> %3
-}
-
-define <4 x i64> @test_psubq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
-; GENERIC-LABEL: test_psubq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psubq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psubq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psubq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psubq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_psubq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = sub <4 x i64> %a0, %a1
- %2 = load <4 x i64>, <4 x i64> *%a2, align 32
- %3 = sub <4 x i64> %1, %2
- ret <4 x i64> %3
-}
-
-define <32 x i8> @test_psubsb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
-; GENERIC-LABEL: test_psubsb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psubsb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psubsb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psubsb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psubsb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_psubsb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %a0, <32 x i8> %a1)
- %2 = load <32 x i8>, <32 x i8> *%a2, align 32
- %3 = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %1, <32 x i8> %2)
- ret <32 x i8> %3
-}
-declare <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8>, <32 x i8>) nounwind readnone
-
-define <16 x i16> @test_psubsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
-; GENERIC-LABEL: test_psubsw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psubsw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psubsw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psubsw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psubsw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_psubsw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %a0, <16 x i16> %a1)
- %2 = load <16 x i16>, <16 x i16> *%a2, align 32
- %3 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %1, <16 x i16> %2)
- ret <16 x i16> %3
-}
-declare <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16>, <16 x i16>) nounwind readnone
-
-define <32 x i8> @test_psubusb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
-; GENERIC-LABEL: test_psubusb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psubusb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psubusb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psubusb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psubusb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_psubusb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsubusb %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpsubusb (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %a0, <32 x i8> %a1)
- %2 = load <32 x i8>, <32 x i8> *%a2, align 32
- %3 = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %1, <32 x i8> %2)
- ret <32 x i8> %3
-}
-declare <32 x i8> @llvm.usub.sat.v32i8(<32 x i8>, <32 x i8>) nounwind readnone
-
-define <16 x i16> @test_psubusw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
-; GENERIC-LABEL: test_psubusw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psubusw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psubusw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psubusw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psubusw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_psubusw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsubusw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpsubusw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %a0, <16 x i16> %a1)
- %2 = load <16 x i16>, <16 x i16> *%a2, align 32
- %3 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %1, <16 x i16> %2)
- ret <16 x i16> %3
-}
-declare <16 x i16> @llvm.usub.sat.v16i16(<16 x i16>, <16 x i16>) nounwind readnone
-
-define <16 x i16> @test_psubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
-; GENERIC-LABEL: test_psubw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psubw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psubw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psubw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psubw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_psubw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = sub <16 x i16> %a0, %a1
- %2 = load <16 x i16>, <16 x i16> *%a2, align 32
- %3 = sub <16 x i16> %1, %2
- ret <16 x i16> %3
-}
-
-define <32 x i8> @test_punpckhbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
-; GENERIC-LABEL: test_punpckhbw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00]
-; GENERIC-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_punpckhbw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00]
-; HASWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_punpckhbw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00]
-; BROADWELL-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [7:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_punpckhbw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00]
-; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_punpckhbw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:1.00]
-; SKX-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_punpckhbw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] sched: [1:0.25]
-; ZNVER1-NEXT: vpunpckhbw {{.*#+}} ymm0 = ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15],ymm0[24],mem[24],ymm0[25],mem[25],ymm0[26],mem[26],ymm0[27],mem[27],ymm0[28],mem[28],ymm0[29],mem[29],ymm0[30],mem[30],ymm0[31],mem[31] sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <32 x i8> %a0, <32 x i8> %a1, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
- %2 = load <32 x i8>, <32 x i8> *%a2, align 32
- %3 = shufflevector <32 x i8> %1, <32 x i8> %2, <32 x i32> <i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
- ret <32 x i8> %3
-}
-
-define <8 x i32> @test_punpckhdq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
-; GENERIC-LABEL: test_punpckhdq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
-; GENERIC-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
-; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
-; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_punpckhdq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
-; HASWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
-; HASWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
-; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_punpckhdq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
-; BROADWELL-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [7:1.00]
-; BROADWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
-; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_punpckhdq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
-; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
-; SKYLAKE-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_punpckhdq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
-; SKX-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
-; SKX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
-; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_punpckhdq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:0.25]
-; ZNVER1-NEXT: vpunpckhdq {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:0.50]
-; ZNVER1-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.25]
-; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
- %2 = load <8 x i32>, <8 x i32> *%a2, align 32
- %3 = shufflevector <8 x i32> %1, <8 x i32> %2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
- %4 = add <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
- ret <8 x i32> %4
-}
-
-define <4 x i64> @test_punpckhqdq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
-; GENERIC-LABEL: test_punpckhqdq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
-; GENERIC-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
-; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_punpckhqdq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
-; HASWELL-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
-; HASWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_punpckhqdq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
-; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [7:1.00]
-; BROADWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_punpckhqdq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
-; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
-; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_punpckhqdq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
-; SKX-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
-; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_punpckhqdq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpunpckhqdq {{.*#+}} ymm1 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:0.25]
-; ZNVER1-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:0.50]
-; ZNVER1-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
- %2 = load <4 x i64>, <4 x i64> *%a2, align 32
- %3 = shufflevector <4 x i64> %a0, <4 x i64> %2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
- %4 = add <4 x i64> %1, %3
- ret <4 x i64> %4
-}
-
-define <16 x i16> @test_punpckhwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
-; GENERIC-LABEL: test_punpckhwd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00]
-; GENERIC-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_punpckhwd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00]
-; HASWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_punpckhwd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00]
-; BROADWELL-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [7:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_punpckhwd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00]
-; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_punpckhwd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:1.00]
-; SKX-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_punpckhwd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15] sched: [1:0.25]
-; ZNVER1-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[12],mem[12],ymm0[13],mem[13],ymm0[14],mem[14],ymm0[15],mem[15] sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
- %2 = load <16 x i16>, <16 x i16> *%a2, align 32
- %3 = shufflevector <16 x i16> %1, <16 x i16> %2, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
- ret <16 x i16> %3
-}
-
-define <32 x i8> @test_punpcklbw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) {
-; GENERIC-LABEL: test_punpcklbw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00]
-; GENERIC-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_punpcklbw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00]
-; HASWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_punpcklbw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00]
-; BROADWELL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [7:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_punpcklbw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00]
-; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_punpcklbw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:1.00]
-; SKX-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_punpcklbw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] sched: [1:0.25]
-; ZNVER1-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[4],mem[4],ymm0[5],mem[5],ymm0[6],mem[6],ymm0[7],mem[7],ymm0[16],mem[16],ymm0[17],mem[17],ymm0[18],mem[18],ymm0[19],mem[19],ymm0[20],mem[20],ymm0[21],mem[21],ymm0[22],mem[22],ymm0[23],mem[23] sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <32 x i8> %a0, <32 x i8> %a1, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
- %2 = load <32 x i8>, <32 x i8> *%a2, align 32
- %3 = shufflevector <32 x i8> %1, <32 x i8> %2, <32 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55>
- ret <32 x i8> %3
-}
-
-define <8 x i32> @test_punpckldq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) {
-; GENERIC-LABEL: test_punpckldq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
-; GENERIC-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
-; GENERIC-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
-; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_punpckldq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
-; HASWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
-; HASWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
-; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_punpckldq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
-; BROADWELL-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [7:1.00]
-; BROADWELL-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
-; BROADWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_punpckldq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
-; SKYLAKE-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
-; SKYLAKE-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_punpckldq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
-; SKX-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
-; SKX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.50]
-; SKX-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_punpckldq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:0.25]
-; ZNVER1-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:0.50]
-; ZNVER1-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # sched: [1:0.25]
-; ZNVER1-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
- %2 = load <8 x i32>, <8 x i32> *%a2, align 32
- %3 = shufflevector <8 x i32> %1, <8 x i32> %2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
- %4 = add <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
- ret <8 x i32> %4
-}
-
-define <4 x i64> @test_punpcklqdq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
-; GENERIC-LABEL: test_punpcklqdq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
-; GENERIC-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
-; GENERIC-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_punpcklqdq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
-; HASWELL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
-; HASWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_punpcklqdq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
-; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [7:1.00]
-; BROADWELL-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_punpcklqdq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
-; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
-; SKYLAKE-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_punpcklqdq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
-; SKX-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
-; SKX-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_punpcklqdq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpunpcklqdq {{.*#+}} ymm1 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:0.25]
-; ZNVER1-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:0.50]
-; ZNVER1-NEXT: vpaddq %ymm0, %ymm1, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <4 x i64> %a0, <4 x i64> %a1, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
- %2 = load <4 x i64>, <4 x i64> *%a2, align 32
- %3 = shufflevector <4 x i64> %a0, <4 x i64> %2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
- %4 = add <4 x i64> %1, %3
- ret <4 x i64> %4
-}
-
-define <16 x i16> @test_punpcklwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) {
-; GENERIC-LABEL: test_punpcklwd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00]
-; GENERIC-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_punpcklwd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00]
-; HASWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_punpcklwd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00]
-; BROADWELL-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [7:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_punpcklwd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00]
-; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_punpcklwd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:1.00]
-; SKX-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_punpcklwd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11] sched: [1:0.25]
-; ZNVER1-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[2],mem[2],ymm0[3],mem[3],ymm0[8],mem[8],ymm0[9],mem[9],ymm0[10],mem[10],ymm0[11],mem[11] sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
- %2 = load <16 x i16>, <16 x i16> *%a2, align 32
- %3 = shufflevector <16 x i16> %1, <16 x i16> %2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
- ret <16 x i16> %3
-}
-
-define <4 x i64> @test_pxor(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) {
-; GENERIC-LABEL: test_pxor:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; GENERIC-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pxor:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; HASWELL-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pxor:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; BROADWELL-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [7:0.50]
-; BROADWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pxor:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKYLAKE-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pxor:
-; SKX: # %bb.0:
-; SKX-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pxor:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50]
-; ZNVER1-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = xor <4 x i64> %a0, %a1
- %2 = load <4 x i64>, <4 x i64> *%a2, align 32
- %3 = xor <4 x i64> %1, %2
- %4 = add <4 x i64> %3, %a1
- ret <4 x i64> %4
-}
-
-!0 = !{i32 1}
diff --git a/llvm/test/CodeGen/X86/avx512-schedule.ll b/llvm/test/CodeGen/X86/avx512-schedule.ll
deleted file mode 100755
index bc9e6f761f4..00000000000
--- a/llvm/test/CodeGen/X86/avx512-schedule.ll
+++ /dev/null
@@ -1,8762 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx512f,+avx512dq,+avx512bw,+avx512vl | FileCheck %s --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=SKX
-
-; This test is an assembly of avx512 instructions to check their scheduling
-
-define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) {
-; GENERIC-LABEL: addpd512:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: addpd512:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %add.i = fadd <8 x double> %x, %y
- ret <8 x double> %add.i
-}
-
-define <8 x double> @addpd512fold(<8 x double> %y) {
-; GENERIC-LABEL: addpd512fold:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [10:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: addpd512fold:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00>
- ret <8 x double> %add.i
-}
-
-define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) {
-; GENERIC-LABEL: addps512:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vaddps %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: addps512:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vaddps %zmm0, %zmm1, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %add.i = fadd <16 x float> %x, %y
- ret <16 x float> %add.i
-}
-
-define <16 x float> @addps512fold(<16 x float> %y) {
-; GENERIC-LABEL: addps512fold:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0 # sched: [10:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: addps512fold:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
- ret <16 x float> %add.i
-}
-
-define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) {
-; GENERIC-LABEL: subpd512:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vsubpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: subpd512:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vsubpd %zmm0, %zmm1, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %sub.i = fsub <8 x double> %x, %y
- ret <8 x double> %sub.i
-}
-
-define <8 x double> @subpd512fold(<8 x double> %y, <8 x double>* %x) {
-; GENERIC-LABEL: subpd512fold:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vsubpd (%rdi), %zmm0, %zmm0 # sched: [10:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: subpd512fold:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vsubpd (%rdi), %zmm0, %zmm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %tmp2 = load <8 x double>, <8 x double>* %x, align 8
- %sub.i = fsub <8 x double> %y, %tmp2
- ret <8 x double> %sub.i
-}
-
-define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) {
-; GENERIC-LABEL: subps512:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vsubps %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: subps512:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vsubps %zmm0, %zmm1, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %sub.i = fsub <16 x float> %x, %y
- ret <16 x float> %sub.i
-}
-
-define <16 x float> @subps512fold(<16 x float> %y, <16 x float>* %x) {
-; GENERIC-LABEL: subps512fold:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vsubps (%rdi), %zmm0, %zmm0 # sched: [10:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: subps512fold:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vsubps (%rdi), %zmm0, %zmm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %tmp2 = load <16 x float>, <16 x float>* %x, align 4
- %sub.i = fsub <16 x float> %y, %tmp2
- ret <16 x float> %sub.i
-}
-
-define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) {
-; GENERIC-LABEL: imulq512:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmullq %zmm0, %zmm1, %zmm0 # sched: [5:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: imulq512:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmullq %zmm0, %zmm1, %zmm0 # sched: [12:1.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %z = mul <8 x i64>%x, %y
- ret <8 x i64>%z
-}
-
-define <4 x i64> @imulq256(<4 x i64> %y, <4 x i64> %x) {
-; GENERIC-LABEL: imulq256:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmullq %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: imulq256:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmullq %ymm0, %ymm1, %ymm0 # sched: [12:1.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %z = mul <4 x i64>%x, %y
- ret <4 x i64>%z
-}
-
-define <2 x i64> @imulq128(<2 x i64> %y, <2 x i64> %x) {
-; GENERIC-LABEL: imulq128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmullq %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: imulq128:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmullq %xmm0, %xmm1, %xmm0 # sched: [12:1.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %z = mul <2 x i64>%x, %y
- ret <2 x i64>%z
-}
-
-define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) {
-; GENERIC-LABEL: mulpd512:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vmulpd %zmm0, %zmm1, %zmm0 # sched: [5:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mulpd512:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vmulpd %zmm0, %zmm1, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %mul.i = fmul <8 x double> %x, %y
- ret <8 x double> %mul.i
-}
-
-define <8 x double> @mulpd512fold(<8 x double> %y) {
-; GENERIC-LABEL: mulpd512fold:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [12:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mulpd512fold:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vmulpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
- ret <8 x double> %mul.i
-}
-
-define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) {
-; GENERIC-LABEL: mulps512:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [5:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mulps512:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %mul.i = fmul <16 x float> %x, %y
- ret <16 x float> %mul.i
-}
-
-define <16 x float> @mulps512fold(<16 x float> %y) {
-; GENERIC-LABEL: mulps512fold:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [12:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mulps512fold:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
- ret <16 x float> %mul.i
-}
-
-define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) {
-; GENERIC-LABEL: divpd512:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vdivpd %zmm0, %zmm1, %zmm0 # sched: [45:44.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: divpd512:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vdivpd %zmm0, %zmm1, %zmm0 # sched: [23:16.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %div.i = fdiv <8 x double> %x, %y
- ret <8 x double> %div.i
-}
-
-define <8 x double> @divpd512fold(<8 x double> %y) {
-; GENERIC-LABEL: divpd512fold:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [52:44.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: divpd512fold:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vdivpd {{.*}}(%rip), %zmm0, %zmm0 # sched: [30:16.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
- ret <8 x double> %div.i
-}
-
-define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) {
-; GENERIC-LABEL: divps512:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vdivps %zmm0, %zmm1, %zmm0 # sched: [29:28.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: divps512:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vdivps %zmm0, %zmm1, %zmm0 # sched: [18:10.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %div.i = fdiv <16 x float> %x, %y
- ret <16 x float> %div.i
-}
-
-define <16 x float> @divps512fold(<16 x float> %y) {
-; GENERIC-LABEL: divps512fold:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0 # sched: [36:28.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: divps512fold:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vdivps {{.*}}(%rip), %zmm0, %zmm0 # sched: [25:10.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000>
- ret <16 x float> %div.i
-}
-
-define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
-; GENERIC-LABEL: vpaddq_test:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpaddq %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: vpaddq_test:
-; SKX: # %bb.0:
-; SKX-NEXT: vpaddq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = add <8 x i64> %i, %j
- ret <8 x i64> %x
-}
-
-define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, <8 x i64>* %j) nounwind {
-; GENERIC-LABEL: vpaddq_fold_test:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpaddq (%rdi), %zmm0, %zmm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: vpaddq_fold_test:
-; SKX: # %bb.0:
-; SKX-NEXT: vpaddq (%rdi), %zmm0, %zmm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %tmp = load <8 x i64>, <8 x i64>* %j, align 4
- %x = add <8 x i64> %i, %tmp
- ret <8 x i64> %x
-}
-
-define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind {
-; GENERIC-LABEL: vpaddq_broadcast_test:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: vpaddq_broadcast_test:
-; SKX: # %bb.0:
-; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = add <8 x i64> %i, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
- ret <8 x i64> %x
-}
-
-define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, i64* %j) nounwind {
-; GENERIC-LABEL: vpaddq_broadcast2_test:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: vpaddq_broadcast2_test:
-; SKX: # %bb.0:
-; SKX-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %tmp = load i64, i64* %j
- %j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0
- %j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1
- %j.2 = insertelement <8 x i64> %j.1, i64 %tmp, i32 2
- %j.3 = insertelement <8 x i64> %j.2, i64 %tmp, i32 3
- %j.4 = insertelement <8 x i64> %j.3, i64 %tmp, i32 4
- %j.5 = insertelement <8 x i64> %j.4, i64 %tmp, i32 5
- %j.6 = insertelement <8 x i64> %j.5, i64 %tmp, i32 6
- %j.7 = insertelement <8 x i64> %j.6, i64 %tmp, i32 7
- %x = add <8 x i64> %i, %j.7
- ret <8 x i64> %x
-}
-
-define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
-; GENERIC-LABEL: vpaddd_test:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: vpaddd_test:
-; SKX: # %bb.0:
-; SKX-NEXT: vpaddd %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = add <16 x i32> %i, %j
- ret <16 x i32> %x
-}
-
-define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, <16 x i32>* %j) nounwind {
-; GENERIC-LABEL: vpaddd_fold_test:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: vpaddd_fold_test:
-; SKX: # %bb.0:
-; SKX-NEXT: vpaddd (%rdi), %zmm0, %zmm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %tmp = load <16 x i32>, <16 x i32>* %j, align 4
- %x = add <16 x i32> %i, %tmp
- ret <16 x i32> %x
-}
-
-define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind {
-; GENERIC-LABEL: vpaddd_broadcast_test:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: vpaddd_broadcast_test:
-; SKX: # %bb.0:
-; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = add <16 x i32> %i, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
- ret <16 x i32> %x
-}
-
-define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
-; GENERIC-LABEL: vpaddd_mask_test:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: vpaddd_mask_test:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp ne <16 x i32> %mask1, zeroinitializer
- %x = add <16 x i32> %i, %j
- %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
- ret <16 x i32> %r
-}
-
-define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone {
-; GENERIC-LABEL: vpaddd_maskz_test:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: vpaddd_maskz_test:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp ne <16 x i32> %mask1, zeroinitializer
- %x = add <16 x i32> %i, %j
- %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
- ret <16 x i32> %r
-}
-
-define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
-; GENERIC-LABEL: vpaddd_mask_fold_test:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: vpaddd_mask_fold_test:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp ne <16 x i32> %mask1, zeroinitializer
- %j = load <16 x i32>, <16 x i32>* %j.ptr
- %x = add <16 x i32> %i, %j
- %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
- ret <16 x i32> %r
-}
-
-define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
-; GENERIC-LABEL: vpaddd_mask_broadcast_test:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: vpaddd_mask_broadcast_test:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp ne <16 x i32> %mask1, zeroinitializer
- %x = add <16 x i32> %i, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
- %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i
- ret <16 x i32> %r
-}
-
-define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, <16 x i32>* %j.ptr, <16 x i32> %mask1) nounwind readnone {
-; GENERIC-LABEL: vpaddd_maskz_fold_test:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: vpaddd_maskz_fold_test:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp ne <16 x i32> %mask1, zeroinitializer
- %j = load <16 x i32>, <16 x i32>* %j.ptr
- %x = add <16 x i32> %i, %j
- %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
- ret <16 x i32> %r
-}
-
-define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone {
-; GENERIC-LABEL: vpaddd_maskz_broadcast_test:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: vpaddd_maskz_broadcast_test:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp ne <16 x i32> %mask1, zeroinitializer
- %x = add <16 x i32> %i, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
- %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
- ret <16 x i32> %r
-}
-
-define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone {
-; GENERIC-LABEL: vpsubq_test:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsubq %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: vpsubq_test:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsubq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = sub <8 x i64> %i, %j
- ret <8 x i64> %x
-}
-
-define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone {
-; GENERIC-LABEL: vpsubd_test:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsubd %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: vpsubd_test:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsubd %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = sub <16 x i32> %i, %j
- ret <16 x i32> %x
-}
-
-define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) {
-; GENERIC-LABEL: vpmulld_test:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmulld %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: vpmulld_test:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmulld %zmm1, %zmm0, %zmm0 # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = mul <16 x i32> %i, %j
- ret <16 x i32> %x
-}
-
-declare float @sqrtf(float) readnone
-define float @sqrtA(float %a) nounwind uwtable readnone ssp {
-; GENERIC-LABEL: sqrtA:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:14.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sqrtA:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:3.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %conv1 = tail call float @sqrtf(float %a) nounwind readnone
- ret float %conv1
-}
-
-declare double @sqrt(double) readnone
-define double @sqrtB(double %a) nounwind uwtable readnone ssp {
-; GENERIC-LABEL: sqrtB:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:21.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sqrtB:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:6.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %call = tail call double @sqrt(double %a) nounwind readnone
- ret double %call
-}
-
-declare float @llvm.sqrt.f32(float)
-define float @sqrtC(float %a) nounwind {
-; GENERIC-LABEL: sqrtC:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:14.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sqrtC:
-; SKX: # %bb.0:
-; SKX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:3.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = call float @llvm.sqrt.f32(float %a)
- ret float %b
-}
-
-declare <16 x float> @llvm.sqrt.v16f32(<16 x float>)
-define <16 x float> @sqrtD(<16 x float> %a) nounwind {
-; GENERIC-LABEL: sqrtD:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vsqrtps %zmm0, %zmm0 # sched: [29:28.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sqrtD:
-; SKX: # %bb.0:
-; SKX-NEXT: vsqrtps %zmm0, %zmm0 # sched: [20:12.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a)
- ret <16 x float> %b
-}
-
-declare <8 x double> @llvm.sqrt.v8f64(<8 x double>)
-define <8 x double> @sqrtE(<8 x double> %a) nounwind {
-; GENERIC-LABEL: sqrtE:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vsqrtpd %zmm0, %zmm0 # sched: [45:44.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sqrtE:
-; SKX: # %bb.0:
-; SKX-NEXT: vsqrtpd %zmm0, %zmm0 # sched: [32:24.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a)
- ret <8 x double> %b
-}
-
-define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind {
-; GENERIC-LABEL: fadd_broadcast:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [10:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: fadd_broadcast:
-; SKX: # %bb.0:
-; SKX-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
- ret <16 x float> %b
-}
-
-define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind {
-; GENERIC-LABEL: addq_broadcast:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: addq_broadcast:
-; SKX: # %bb.0:
-; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
- ret <8 x i64> %b
-}
-
-define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
-; GENERIC-LABEL: orq_broadcast:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: orq_broadcast:
-; SKX: # %bb.0:
-; SKX-NEXT: vorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
- ret <8 x i64> %b
-}
-
-define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
-; GENERIC-LABEL: andd512fold:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vandps (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: andd512fold:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vandps (%rdi), %zmm0, %zmm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %a = load <16 x i32>, <16 x i32>* %x, align 4
- %b = and <16 x i32> %y, %a
- ret <16 x i32> %b
-}
-
-define <8 x i64> @andqbrst(<8 x i64> %p1, i64* %ap) {
-; GENERIC-LABEL: andqbrst:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: andqbrst:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %a = load i64, i64* %ap, align 8
- %b = insertelement <8 x i64> undef, i64 %a, i32 0
- %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
- %d = and <8 x i64> %p1, %c
- ret <8 x i64>%d
-}
-
-define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i,
-; GENERIC-LABEL: test_mask_vaddps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_mask_vaddps:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- <16 x float> %j, <16 x i32> %mask1)
- nounwind readnone {
- %mask = icmp ne <16 x i32> %mask1, zeroinitializer
- %x = fadd <16 x float> %i, %j
- %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
- ret <16 x float> %r
-}
-
-define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone {
-; GENERIC-LABEL: test_mask_vmulps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1} # sched: [5:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_mask_vmulps:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp ne <16 x i32> %mask1, zeroinitializer
- %x = fmul <16 x float> %i, %j
- %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
- ret <16 x float> %r
-}
-
-define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone {
-; GENERIC-LABEL: test_mask_vminps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_mask_vminps:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp ne <16 x i32> %mask1, zeroinitializer
- %cmp_res = fcmp olt <16 x float> %i, %j
- %min = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
- %r = select <16 x i1> %mask, <16 x float> %min, <16 x float> %dst
- ret <16 x float> %r
-}
-
-define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i, <8 x double> %j, <8 x i32> %mask1) nounwind readnone {
-; GENERIC-LABEL: test_mask_vminpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_mask_vminpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestmd %ymm3, %ymm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp ne <8 x i32> %mask1, zeroinitializer
- %cmp_res = fcmp olt <8 x double> %i, %j
- %min = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
- %r = select <8 x i1> %mask, <8 x double> %min, <8 x double> %dst
- ret <8 x double> %r
-}
-
-define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone {
-; GENERIC-LABEL: test_mask_vmaxps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_mask_vmaxps:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp ne <16 x i32> %mask1, zeroinitializer
- %cmp_res = fcmp ogt <16 x float> %i, %j
- %max = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j
- %r = select <16 x i1> %mask, <16 x float> %max, <16 x float> %dst
- ret <16 x float> %r
-}
-
-define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i, <8 x double> %j, <8 x i32> %mask1) nounwind readnone {
-; GENERIC-LABEL: test_mask_vmaxpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_mask_vmaxpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestmd %ymm3, %ymm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp ne <8 x i32> %mask1, zeroinitializer
- %cmp_res = fcmp ogt <8 x double> %i, %j
- %max = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j
- %r = select <8 x i1> %mask, <8 x double> %max, <8 x double> %dst
- ret <8 x double> %r
-}
-
-define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone {
-; GENERIC-LABEL: test_mask_vsubps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_mask_vsubps:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp ne <16 x i32> %mask1, zeroinitializer
- %x = fsub <16 x float> %i, %j
- %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
- ret <16 x float> %r
-}
-
-define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone {
-; GENERIC-LABEL: test_mask_vdivps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1} # sched: [29:28.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_mask_vdivps:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1} # sched: [18:10.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp ne <16 x i32> %mask1, zeroinitializer
- %x = fdiv <16 x float> %i, %j
- %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst
- ret <16 x float> %r
-}
-
-define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i, <8 x double> %j, <8 x i64> %mask1) nounwind readnone {
-; GENERIC-LABEL: test_mask_vaddpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestmq %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_mask_vaddpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestmq %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp ne <8 x i64> %mask1, zeroinitializer
- %x = fadd <8 x double> %i, %j
- %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
- ret <8 x double> %r
-}
-
-define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j, <8 x i64> %mask1) nounwind readnone {
-; GENERIC-LABEL: test_maskz_vaddpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_maskz_vaddpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp ne <8 x i64> %mask1, zeroinitializer
- %x = fadd <8 x double> %i, %j
- %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
- ret <8 x double> %r
-}
-
-define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i, <8 x double>* %j, <8 x i64> %mask1) nounwind {
-; GENERIC-LABEL: test_mask_fold_vaddpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1} # sched: [10:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_mask_fold_vaddpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1} # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp ne <8 x i64> %mask1, zeroinitializer
- %tmp = load <8 x double>, <8 x double>* %j, align 8
- %x = fadd <8 x double> %i, %tmp
- %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst
- ret <8 x double> %r
-}
-
-define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, <8 x double>* %j, <8 x i64> %mask1) nounwind {
-; GENERIC-LABEL: test_maskz_fold_vaddpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [10:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_maskz_fold_vaddpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z} # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp ne <8 x i64> %mask1, zeroinitializer
- %tmp = load <8 x double>, <8 x double>* %j, align 8
- %x = fadd <8 x double> %i, %tmp
- %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
- ret <8 x double> %r
-}
-
-define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, double* %j) nounwind {
-; GENERIC-LABEL: test_broadcast_vaddpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [10:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_broadcast_vaddpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %tmp = load double, double* %j
- %b = insertelement <8 x double> undef, double %tmp, i32 0
- %c = shufflevector <8 x double> %b, <8 x double> undef,
- <8 x i32> zeroinitializer
- %x = fadd <8 x double> %c, %i
- ret <8 x double> %x
-}
-
-define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i, double* %j, <8 x i64> %mask1) nounwind {
-; GENERIC-LABEL: test_mask_broadcast_vaddpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm0 {%k1} # sched: [10:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_mask_broadcast_vaddpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm0 {%k1} # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp ne <8 x i64> %mask1, zeroinitializer
- %tmp = load double, double* %j
- %b = insertelement <8 x double> undef, double %tmp, i32 0
- %c = shufflevector <8 x double> %b, <8 x double> undef,
- <8 x i32> zeroinitializer
- %x = fadd <8 x double> %c, %i
- %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %i
- ret <8 x double> %r
-}
-
-define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j,
-; GENERIC-LABEL: test_maskz_broadcast_vaddpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} # sched: [10:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_maskz_broadcast_vaddpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- <8 x i64> %mask1) nounwind {
- %mask = icmp ne <8 x i64> %mask1, zeroinitializer
- %tmp = load double, double* %j
- %b = insertelement <8 x double> undef, double %tmp, i32 0
- %c = shufflevector <8 x double> %b, <8 x double> undef,
- <8 x i32> zeroinitializer
- %x = fadd <8 x double> %c, %i
- %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer
- ret <8 x double> %r
-}
-
-define <16 x float> @test_fxor(<16 x float> %a) {
-; GENERIC-LABEL: test_fxor:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_fxor:
-; SKX: # %bb.0:
-; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-
- %res = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
- ret <16 x float>%res
-}
-
-define <8 x float> @test_fxor_8f32(<8 x float> %a) {
-; GENERIC-LABEL: test_fxor_8f32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_fxor_8f32:
-; SKX: # %bb.0:
-; SKX-NEXT: vxorps {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
- ret <8 x float>%res
-}
-
-define <8 x double> @fabs_v8f64(<8 x double> %p)
-; GENERIC-LABEL: fabs_v8f64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: fabs_v8f64:
-; SKX: # %bb.0:
-; SKX-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-{
- %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
- ret <8 x double> %t
-}
-declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p)
-
-define <16 x float> @fabs_v16f32(<16 x float> %p)
-; GENERIC-LABEL: fabs_v16f32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: fabs_v16f32:
-; SKX: # %bb.0:
-; SKX-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-{
- %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
- ret <16 x float> %t
-}
-declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
-
-define double @test1(double %a, double %b) nounwind {
-; GENERIC-LABEL: test1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00]
-; GENERIC-NEXT: jne .LBB64_1 # sched: [1:1.00]
-; GENERIC-NEXT: jnp .LBB64_2 # sched: [1:1.00]
-; GENERIC-NEXT: .LBB64_1: # %l1
-; GENERIC-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-; GENERIC-NEXT: .LBB64_2: # %l2
-; GENERIC-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test1:
-; SKX: # %bb.0:
-; SKX-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00]
-; SKX-NEXT: jne .LBB64_1 # sched: [1:0.50]
-; SKX-NEXT: jnp .LBB64_2 # sched: [1:0.50]
-; SKX-NEXT: .LBB64_1: # %l1
-; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-; SKX-NEXT: .LBB64_2: # %l2
-; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %tobool = fcmp une double %a, %b
- br i1 %tobool, label %l1, label %l2
-
-l1:
- %c = fsub double %a, %b
- ret double %c
-l2:
- %c1 = fadd double %a, %b
- ret double %c1
-}
-
-define float @test2(float %a, float %b) nounwind {
-; GENERIC-LABEL: test2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vucomiss %xmm0, %xmm1 # sched: [2:1.00]
-; GENERIC-NEXT: jbe .LBB65_2 # sched: [1:1.00]
-; GENERIC-NEXT: # %bb.1: # %l1
-; GENERIC-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-; GENERIC-NEXT: .LBB65_2: # %l2
-; GENERIC-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2:
-; SKX: # %bb.0:
-; SKX-NEXT: vucomiss %xmm0, %xmm1 # sched: [2:1.00]
-; SKX-NEXT: jbe .LBB65_2 # sched: [1:0.50]
-; SKX-NEXT: # %bb.1: # %l1
-; SKX-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-; SKX-NEXT: .LBB65_2: # %l2
-; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %tobool = fcmp olt float %a, %b
- br i1 %tobool, label %l1, label %l2
-
-l1:
- %c = fsub float %a, %b
- ret float %c
-l2:
- %c1 = fadd float %a, %b
- ret float %c1
-}
-
-define i32 @test3(float %a, float %b) {
-; GENERIC-LABEL: test3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcmpeqss %xmm1, %xmm0, %k0 # sched: [3:1.00]
-; GENERIC-NEXT: kmovw %k0, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test3:
-; SKX: # %bb.0:
-; SKX-NEXT: vcmpeqss %xmm1, %xmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: kmovw %k0, %eax # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-
- %cmp10.i = fcmp oeq float %a, %b
- %conv11.i = zext i1 %cmp10.i to i32
- ret i32 %conv11.i
-}
-
-define float @test5(float %p) #0 {
-; GENERIC-LABEL: test5:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [0:0.25]
-; GENERIC-NEXT: vucomiss %xmm1, %xmm0 # sched: [2:1.00]
-; GENERIC-NEXT: jne .LBB67_1 # sched: [1:1.00]
-; GENERIC-NEXT: jp .LBB67_1 # sched: [1:1.00]
-; GENERIC-NEXT: # %bb.2: # %return
-; GENERIC-NEXT: retq # sched: [1:1.00]
-; GENERIC-NEXT: .LBB67_1: # %if.end
-; GENERIC-NEXT: vcmpltss %xmm0, %xmm1, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
-; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
-; GENERIC-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test5:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; SKX-NEXT: vucomiss %xmm1, %xmm0 # sched: [2:1.00]
-; SKX-NEXT: jne .LBB67_1 # sched: [1:0.50]
-; SKX-NEXT: jp .LBB67_1 # sched: [1:0.50]
-; SKX-NEXT: # %bb.2: # %return
-; SKX-NEXT: retq # sched: [7:1.00]
-; SKX-NEXT: .LBB67_1: # %if.end
-; SKX-NEXT: vcmpltss %xmm0, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKX-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %cmp = fcmp oeq float %p, 0.000000e+00
- br i1 %cmp, label %return, label %if.end
-
-if.end: ; preds = %entry
- %cmp1 = fcmp ogt float %p, 0.000000e+00
- %cond = select i1 %cmp1, float 1.000000e+00, float -1.000000e+00
- br label %return
-
-return: ; preds = %if.end, %entry
- %retval.0 = phi float [ %cond, %if.end ], [ %p, %entry ]
- ret float %retval.0
-}
-
-define i32 @test6(i32 %a, i32 %b) {
-; GENERIC-LABEL: test6:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: xorl %eax, %eax # sched: [0:0.25]
-; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33]
-; GENERIC-NEXT: sete %al # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test6:
-; SKX: # %bb.0:
-; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25]
-; SKX-NEXT: cmpl %esi, %edi # sched: [1:0.25]
-; SKX-NEXT: sete %al # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %cmp = icmp eq i32 %a, %b
- %res = zext i1 %cmp to i32
- ret i32 %res
-}
-
-define i32 @test7(double %x, double %y) #2 {
-; GENERIC-LABEL: test7:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: xorl %eax, %eax # sched: [0:0.25]
-; GENERIC-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00]
-; GENERIC-NEXT: setne %al # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test7:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: xorl %eax, %eax # sched: [1:0.25]
-; SKX-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00]
-; SKX-NEXT: setne %al # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %0 = fcmp one double %x, %y
- %or = zext i1 %0 to i32
- ret i32 %or
-}
-
-define i32 @test8(i32 %a1, i32 %a2, i32 %a3) {
-; GENERIC-LABEL: test8:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: xorl $-2147483648, %esi # imm = 0x80000000
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: testl %edx, %edx # sched: [1:0.33]
-; GENERIC-NEXT: movl $1, %eax # sched: [1:0.33]
-; GENERIC-NEXT: cmovel %eax, %edx # sched: [2:0.67]
-; GENERIC-NEXT: notl %edi # sched: [1:0.33]
-; GENERIC-NEXT: orl %edi, %esi # sched: [1:0.33]
-; GENERIC-NEXT: cmovnel %edx, %eax # sched: [2:0.67]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test8:
-; SKX: # %bb.0:
-; SKX-NEXT: notl %edi # sched: [1:0.25]
-; SKX-NEXT: xorl $-2147483648, %esi # imm = 0x80000000
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: testl %edx, %edx # sched: [1:0.25]
-; SKX-NEXT: movl $1, %eax # sched: [1:0.25]
-; SKX-NEXT: cmovel %eax, %edx # sched: [1:0.50]
-; SKX-NEXT: orl %edi, %esi # sched: [1:0.25]
-; SKX-NEXT: cmovnel %edx, %eax # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %tmp1 = icmp eq i32 %a1, -1
- %tmp2 = icmp eq i32 %a2, -2147483648
- %tmp3 = and i1 %tmp1, %tmp2
- %tmp4 = icmp eq i32 %a3, 0
- %tmp5 = or i1 %tmp3, %tmp4
- %res = select i1 %tmp5, i32 1, i32 %a3
- ret i32 %res
-}
-
-define i32 @test9(i64 %a) {
-; GENERIC-LABEL: test9:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: testb $1, %dil # sched: [1:0.33]
-; GENERIC-NEXT: jne .LBB71_2 # sched: [1:1.00]
-; GENERIC-NEXT: # %bb.1: # %A
-; GENERIC-NEXT: movl $6, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-; GENERIC-NEXT: .LBB71_2: # %B
-; GENERIC-NEXT: movl $7, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test9:
-; SKX: # %bb.0:
-; SKX-NEXT: testb $1, %dil # sched: [1:0.25]
-; SKX-NEXT: jne .LBB71_2 # sched: [1:0.50]
-; SKX-NEXT: # %bb.1: # %A
-; SKX-NEXT: movl $6, %eax # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
-; SKX-NEXT: .LBB71_2: # %B
-; SKX-NEXT: movl $7, %eax # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = and i64 %a, 1
- %cmp10.i = icmp eq i64 %b, 0
- br i1 %cmp10.i, label %A, label %B
-A:
- ret i32 6
-B:
- ret i32 7
-}
-
-define i32 @test10(i64 %b, i64 %c, i1 %d) {
-; GENERIC-LABEL: test10:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl %edx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: andb $1, %al # sched: [1:0.33]
-; GENERIC-NEXT: cmpq %rsi, %rdi # sched: [1:0.33]
-; GENERIC-NEXT: sete %cl # sched: [1:0.50]
-; GENERIC-NEXT: orb %dl, %cl # sched: [1:0.33]
-; GENERIC-NEXT: andb $1, %cl # sched: [1:0.33]
-; GENERIC-NEXT: cmpb %cl, %al # sched: [1:0.33]
-; GENERIC-NEXT: je .LBB72_1 # sched: [1:1.00]
-; GENERIC-NEXT: # %bb.2: # %if.end.i
-; GENERIC-NEXT: movl $6, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-; GENERIC-NEXT: .LBB72_1: # %if.then.i
-; GENERIC-NEXT: movl $5, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test10:
-; SKX: # %bb.0:
-; SKX-NEXT: movl %edx, %eax # sched: [1:0.25]
-; SKX-NEXT: andb $1, %al # sched: [1:0.25]
-; SKX-NEXT: cmpq %rsi, %rdi # sched: [1:0.25]
-; SKX-NEXT: sete %cl # sched: [1:0.50]
-; SKX-NEXT: orb %dl, %cl # sched: [1:0.25]
-; SKX-NEXT: andb $1, %cl # sched: [1:0.25]
-; SKX-NEXT: cmpb %cl, %al # sched: [1:0.25]
-; SKX-NEXT: je .LBB72_1 # sched: [1:0.50]
-; SKX-NEXT: # %bb.2: # %if.end.i
-; SKX-NEXT: movl $6, %eax # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
-; SKX-NEXT: .LBB72_1: # %if.then.i
-; SKX-NEXT: movl $5, %eax # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
-
- %cmp8.i = icmp eq i64 %b, %c
- %or1 = or i1 %d, %cmp8.i
- %xor1 = xor i1 %d, %or1
- br i1 %xor1, label %if.end.i, label %if.then.i
-
-if.then.i:
- ret i32 5
-
-if.end.i:
- ret i32 6
-}
-
-define <16 x float> @sitof32(<16 x i32> %a) nounwind {
-; GENERIC-LABEL: sitof32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sitof32:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = sitofp <16 x i32> %a to <16 x float>
- ret <16 x float> %b
-}
-
-define <8 x double> @sltof864(<8 x i64> %a) {
-; GENERIC-LABEL: sltof864:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sltof864:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = sitofp <8 x i64> %a to <8 x double>
- ret <8 x double> %b
-}
-
-define <4 x double> @slto4f64(<4 x i64> %a) {
-; GENERIC-LABEL: slto4f64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtqq2pd %ymm0, %ymm0 # sched: [4:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: slto4f64:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtqq2pd %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = sitofp <4 x i64> %a to <4 x double>
- ret <4 x double> %b
-}
-
-define <2 x double> @slto2f64(<2 x i64> %a) {
-; GENERIC-LABEL: slto2f64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: slto2f64:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = sitofp <2 x i64> %a to <2 x double>
- ret <2 x double> %b
-}
-
-define <2 x float> @sltof2f32(<2 x i64> %a) {
-; GENERIC-LABEL: sltof2f32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtqq2ps %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sltof2f32:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtqq2ps %xmm0, %xmm0 # sched: [5:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = sitofp <2 x i64> %a to <2 x float>
- ret <2 x float>%b
-}
-
-define <4 x float> @slto4f32_mem(<4 x i64>* %a) {
-; GENERIC-LABEL: slto4f32_mem:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtqq2psy (%rdi), %xmm0 # sched: [10:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: slto4f32_mem:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtqq2psy (%rdi), %xmm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a1 = load <4 x i64>, <4 x i64>* %a, align 8
- %b = sitofp <4 x i64> %a1 to <4 x float>
- ret <4 x float>%b
-}
-
-define <4 x i64> @f64to4sl(<4 x double> %a) {
-; GENERIC-LABEL: f64to4sl:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvttpd2qq %ymm0, %ymm0 # sched: [4:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: f64to4sl:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvttpd2qq %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = fptosi <4 x double> %a to <4 x i64>
- ret <4 x i64> %b
-}
-
-define <4 x i64> @f32to4sl(<4 x float> %a) {
-; GENERIC-LABEL: f32to4sl:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvttps2qq %xmm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: f32to4sl:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvttps2qq %xmm0, %ymm0 # sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = fptosi <4 x float> %a to <4 x i64>
- ret <4 x i64> %b
-}
-
-define <4 x float> @slto4f32(<4 x i64> %a) {
-; GENERIC-LABEL: slto4f32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtqq2ps %ymm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: slto4f32:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtqq2ps %ymm0, %xmm0 # sched: [7:1.00]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = sitofp <4 x i64> %a to <4 x float>
- ret <4 x float> %b
-}
-
-define <4 x float> @ulto4f32(<4 x i64> %a) {
-; GENERIC-LABEL: ulto4f32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtuqq2ps %ymm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: ulto4f32:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtuqq2ps %ymm0, %xmm0 # sched: [7:1.00]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = uitofp <4 x i64> %a to <4 x float>
- ret <4 x float> %b
-}
-
-define <8 x double> @ulto8f64(<8 x i64> %a) {
-; GENERIC-LABEL: ulto8f64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: ulto8f64:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = uitofp <8 x i64> %a to <8 x double>
- ret <8 x double> %b
-}
-
-define <16 x double> @ulto16f64(<16 x i64> %a) {
-; GENERIC-LABEL: ulto16f64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:1.00]
-; GENERIC-NEXT: vcvtuqq2pd %zmm1, %zmm1 # sched: [4:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: ulto16f64:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: vcvtuqq2pd %zmm1, %zmm1 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = uitofp <16 x i64> %a to <16 x double>
- ret <16 x double> %b
-}
-
-define <16 x i32> @f64to16si(<16 x float> %a) nounwind {
-; GENERIC-LABEL: f64to16si:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: f64to16si:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = fptosi <16 x float> %a to <16 x i32>
- ret <16 x i32> %b
-}
-
-define <16 x i32> @f32to16ui(<16 x float> %a) nounwind {
-; GENERIC-LABEL: f32to16ui:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvttps2udq %zmm0, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: f32to16ui:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvttps2udq %zmm0, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = fptoui <16 x float> %a to <16 x i32>
- ret <16 x i32> %b
-}
-
-define <16 x i8> @f32to16uc(<16 x float> %f) {
-; GENERIC-LABEL: f32to16uc:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpmovdb %zmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: f32to16uc:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: vpmovdb %zmm0, %xmm0 # sched: [4:2.00]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = fptoui <16 x float> %f to <16 x i8>
- ret <16 x i8> %res
-}
-
-define <16 x i16> @f32to16us(<16 x float> %f) {
-; GENERIC-LABEL: f32to16us:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpmovdw %zmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: f32to16us:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: vpmovdw %zmm0, %ymm0 # sched: [4:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = fptoui <16 x float> %f to <16 x i16>
- ret <16 x i16> %res
-}
-
-define <8 x i32> @f32to8ui(<8 x float> %a) nounwind {
-; GENERIC-LABEL: f32to8ui:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvttps2udq %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: f32to8ui:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvttps2udq %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = fptoui <8 x float> %a to <8 x i32>
- ret <8 x i32> %b
-}
-
-define <4 x i32> @f32to4ui(<4 x float> %a) nounwind {
-; GENERIC-LABEL: f32to4ui:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvttps2udq %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: f32to4ui:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvttps2udq %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = fptoui <4 x float> %a to <4 x i32>
- ret <4 x i32> %b
-}
-
-define <8 x i32> @f64to8ui(<8 x double> %a) nounwind {
-; GENERIC-LABEL: f64to8ui:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvttpd2udq %zmm0, %ymm0 # sched: [4:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: f64to8ui:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvttpd2udq %zmm0, %ymm0 # sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = fptoui <8 x double> %a to <8 x i32>
- ret <8 x i32> %b
-}
-
-define <8 x i16> @f64to8us(<8 x double> %f) {
-; GENERIC-LABEL: f64to8us:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [4:1.00]
-; GENERIC-NEXT: vpmovdw %ymm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: f64to8us:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [7:1.00]
-; SKX-NEXT: vpmovdw %ymm0, %xmm0 # sched: [4:2.00]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = fptoui <8 x double> %f to <8 x i16>
- ret <8 x i16> %res
-}
-
-define <8 x i8> @f64to8uc(<8 x double> %f) {
-; GENERIC-LABEL: f64to8uc:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [4:1.00]
-; GENERIC-NEXT: vpmovdw %ymm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: f64to8uc:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [7:1.00]
-; SKX-NEXT: vpmovdw %ymm0, %xmm0 # sched: [4:2.00]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = fptoui <8 x double> %f to <8 x i8>
- ret <8 x i8> %res
-}
-
-define <4 x i32> @f64to4ui(<4 x double> %a) nounwind {
-; GENERIC-LABEL: f64to4ui:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvttpd2udq %ymm0, %xmm0 # sched: [4:1.00]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: f64to4ui:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvttpd2udq %ymm0, %xmm0 # sched: [7:1.00]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = fptoui <4 x double> %a to <4 x i32>
- ret <4 x i32> %b
-}
-
-define <8 x double> @sito8f64(<8 x i32> %a) {
-; GENERIC-LABEL: sito8f64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sito8f64:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = sitofp <8 x i32> %a to <8 x double>
- ret <8 x double> %b
-}
-define <8 x double> @i32to8f64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind {
-; GENERIC-LABEL: i32to8f64_mask:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} # sched: [4:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: i32to8f64_mask:
-; SKX: # %bb.0:
-; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-; VLNOBW-LABEL: i32to8f64_mask:
-; VLNOBW: # %bb.0:
-; VLNOBW-NEXT: kmovw %edi, %k1
-; VLNOBW-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1}
-; VLNOBW-NEXT: ret{{[l|q]}}
- %1 = bitcast i8 %c to <8 x i1>
- %2 = sitofp <8 x i32> %b to <8 x double>
- %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> %a
- ret <8 x double> %3
-}
-define <8 x double> @sito8f64_maskz(<8 x i32> %a, i8 %b) nounwind {
-; GENERIC-LABEL: sito8f64_maskz:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sito8f64_maskz:
-; SKX: # %bb.0:
-; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-; VLNOBW-LABEL: sito8f64_maskz:
-; VLNOBW: # %bb.0:
-; VLNOBW-NEXT: kmovw %edi, %k1
-; VLNOBW-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z}
-; VLNOBW-NEXT: ret{{[l|q]}}
- %1 = bitcast i8 %b to <8 x i1>
- %2 = sitofp <8 x i32> %a to <8 x double>
- %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> zeroinitializer
- ret <8 x double> %3
-}
-
-define <8 x i32> @f64to8si(<8 x double> %a) {
-; GENERIC-LABEL: f64to8si:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [4:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: f64to8si:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = fptosi <8 x double> %a to <8 x i32>
- ret <8 x i32> %b
-}
-
-define <4 x i32> @f64to4si(<4 x double> %a) {
-; GENERIC-LABEL: f64to4si:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [4:1.00]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: f64to4si:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [7:1.00]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = fptosi <4 x double> %a to <4 x i32>
- ret <4 x i32> %b
-}
-
-define <16 x float> @f64to16f32(<16 x double> %b) nounwind {
-; GENERIC-LABEL: f64to16f32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [4:1.00]
-; GENERIC-NEXT: vcvtpd2ps %zmm1, %ymm1 # sched: [4:1.00]
-; GENERIC-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: f64to16f32:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [7:1.00]
-; SKX-NEXT: vcvtpd2ps %zmm1, %ymm1 # sched: [7:1.00]
-; SKX-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = fptrunc <16 x double> %b to <16 x float>
- ret <16 x float> %a
-}
-
-define <4 x float> @f64to4f32(<4 x double> %b) {
-; GENERIC-LABEL: f64to4f32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [4:1.00]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: f64to4f32:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [7:1.00]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = fptrunc <4 x double> %b to <4 x float>
- ret <4 x float> %a
-}
-
-define <4 x float> @f64to4f32_mask(<4 x double> %b, <4 x i1> %mask) {
-; GENERIC-LABEL: f64to4f32_mask:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovd2m %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} # sched: [4:1.00]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: f64to4f32_mask:
-; SKX: # %bb.0:
-; SKX-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:0.50]
-; SKX-NEXT: vpmovd2m %xmm1, %k1 # sched: [1:1.00]
-; SKX-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z} # sched: [7:1.00]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = fptrunc <4 x double> %b to <4 x float>
- %c = select <4 x i1>%mask, <4 x float>%a, <4 x float> zeroinitializer
- ret <4 x float> %c
-}
-
-define <4 x float> @f64tof32_inreg(<2 x double> %a0, <4 x float> %a1) nounwind {
-; GENERIC-LABEL: f64tof32_inreg:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtsd2ss %xmm0, %xmm1, %xmm0 # sched: [4:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: f64tof32_inreg:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtsd2ss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %ext = extractelement <2 x double> %a0, i32 0
- %cvt = fptrunc double %ext to float
- %res = insertelement <4 x float> %a1, float %cvt, i32 0
- ret <4 x float> %res
-}
-
-define <8 x double> @f32to8f64(<8 x float> %b) nounwind {
-; GENERIC-LABEL: f32to8f64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: f32to8f64:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = fpext <8 x float> %b to <8 x double>
- ret <8 x double> %a
-}
-
-define <4 x double> @f32to4f64_mask(<4 x float> %b, <4 x double> %b1, <4 x double> %a1) {
-; GENERIC-LABEL: f32to4f64_mask:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcmpltpd %ymm2, %ymm1, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: vcvtps2pd %xmm0, %ymm0 {%k1} {z} # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: f32to4f64_mask:
-; SKX: # %bb.0:
-; SKX-NEXT: vcmpltpd %ymm2, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vcvtps2pd %xmm0, %ymm0 {%k1} {z} # sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = fpext <4 x float> %b to <4 x double>
- %mask = fcmp ogt <4 x double> %a1, %b1
- %c = select <4 x i1> %mask, <4 x double> %a, <4 x double> zeroinitializer
- ret <4 x double> %c
-}
-
-define <2 x double> @f32tof64_inreg(<2 x double> %a0, <4 x float> %a1) nounwind {
-; GENERIC-LABEL: f32tof64_inreg:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: f32tof64_inreg:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %ext = extractelement <4 x float> %a1, i32 0
- %cvt = fpext float %ext to double
- %res = insertelement <2 x double> %a0, double %cvt, i32 0
- ret <2 x double> %res
-}
-
-define double @sltof64_load(i64* nocapture %e) {
-; GENERIC-LABEL: sltof64_load:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sltof64_load:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %tmp1 = load i64, i64* %e, align 8
- %conv = sitofp i64 %tmp1 to double
- ret double %conv
-}
-
-define double @sitof64_load(i32* %e) {
-; GENERIC-LABEL: sitof64_load:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sitof64_load:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %tmp1 = load i32, i32* %e, align 4
- %conv = sitofp i32 %tmp1 to double
- ret double %conv
-}
-
-define float @sitof32_load(i32* %e) {
-; GENERIC-LABEL: sitof32_load:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sitof32_load:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %tmp1 = load i32, i32* %e, align 4
- %conv = sitofp i32 %tmp1 to float
- ret float %conv
-}
-
-define float @sltof32_load(i64* %e) {
-; GENERIC-LABEL: sltof32_load:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sltof32_load:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %tmp1 = load i64, i64* %e, align 8
- %conv = sitofp i64 %tmp1 to float
- ret float %conv
-}
-
-define void @f32tof64_loadstore() {
-; GENERIC-LABEL: f32tof64_loadstore:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
-; GENERIC-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: f32tof64_loadstore:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; SKX-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %f = alloca float, align 4
- %d = alloca double, align 8
- %tmp = load float, float* %f, align 4
- %conv = fpext float %tmp to double
- store double %conv, double* %d, align 8
- ret void
-}
-
-define void @f64tof32_loadstore() nounwind uwtable {
-; GENERIC-LABEL: f64tof32_loadstore:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
-; GENERIC-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
-; GENERIC-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: f64tof32_loadstore:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
-; SKX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; SKX-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %f = alloca float, align 4
- %d = alloca double, align 8
- %tmp = load double, double* %d, align 8
- %conv = fptrunc double %tmp to float
- store float %conv, float* %f, align 4
- ret void
-}
-
-define double @long_to_double(i64 %x) {
-; GENERIC-LABEL: long_to_double:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovq %rdi, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: long_to_double:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovq %rdi, %xmm0 # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = bitcast i64 %x to double
- ret double %res
-}
-
-define i64 @double_to_long(double %x) {
-; GENERIC-LABEL: double_to_long:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovq %xmm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: double_to_long:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovq %xmm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = bitcast double %x to i64
- ret i64 %res
-}
-
-define float @int_to_float(i32 %x) {
-; GENERIC-LABEL: int_to_float:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: int_to_float:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = bitcast i32 %x to float
- ret float %res
-}
-
-define i32 @float_to_int(float %x) {
-; GENERIC-LABEL: float_to_int:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovd %xmm0, %eax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: float_to_int:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovd %xmm0, %eax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = bitcast float %x to i32
- ret i32 %res
-}
-
-define <16 x double> @uito16f64(<16 x i32> %a) nounwind {
-; GENERIC-LABEL: uito16f64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtudq2pd %ymm0, %zmm2 # sched: [4:1.00]
-; GENERIC-NEXT: vextractf64x4 $1, %zmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vcvtudq2pd %ymm0, %zmm1 # sched: [4:1.00]
-; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: uito16f64:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtudq2pd %ymm0, %zmm2 # sched: [7:1.00]
-; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0 # sched: [3:1.00]
-; SKX-NEXT: vcvtudq2pd %ymm0, %zmm1 # sched: [7:1.00]
-; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = uitofp <16 x i32> %a to <16 x double>
- ret <16 x double> %b
-}
-
-define <8 x float> @slto8f32(<8 x i64> %a) {
-; GENERIC-LABEL: slto8f32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtqq2ps %zmm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: slto8f32:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtqq2ps %zmm0, %ymm0 # sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = sitofp <8 x i64> %a to <8 x float>
- ret <8 x float> %b
-}
-
-define <16 x float> @slto16f32(<16 x i64> %a) {
-; GENERIC-LABEL: slto16f32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtqq2ps %zmm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vcvtqq2ps %zmm1, %ymm1 # sched: [3:1.00]
-; GENERIC-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: slto16f32:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtqq2ps %zmm0, %ymm0 # sched: [7:1.00]
-; SKX-NEXT: vcvtqq2ps %zmm1, %ymm1 # sched: [7:1.00]
-; SKX-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = sitofp <16 x i64> %a to <16 x float>
- ret <16 x float> %b
-}
-
-define <8 x double> @slto8f64(<8 x i64> %a) {
-; GENERIC-LABEL: slto8f64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: slto8f64:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = sitofp <8 x i64> %a to <8 x double>
- ret <8 x double> %b
-}
-
-define <16 x double> @slto16f64(<16 x i64> %a) {
-; GENERIC-LABEL: slto16f64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:1.00]
-; GENERIC-NEXT: vcvtqq2pd %zmm1, %zmm1 # sched: [4:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: slto16f64:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: vcvtqq2pd %zmm1, %zmm1 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = sitofp <16 x i64> %a to <16 x double>
- ret <16 x double> %b
-}
-
-define <8 x float> @ulto8f32(<8 x i64> %a) {
-; GENERIC-LABEL: ulto8f32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtuqq2ps %zmm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: ulto8f32:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtuqq2ps %zmm0, %ymm0 # sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = uitofp <8 x i64> %a to <8 x float>
- ret <8 x float> %b
-}
-
-define <16 x float> @ulto16f32(<16 x i64> %a) {
-; GENERIC-LABEL: ulto16f32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtuqq2ps %zmm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vcvtuqq2ps %zmm1, %ymm1 # sched: [3:1.00]
-; GENERIC-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: ulto16f32:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtuqq2ps %zmm0, %ymm0 # sched: [7:1.00]
-; SKX-NEXT: vcvtuqq2ps %zmm1, %ymm1 # sched: [7:1.00]
-; SKX-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = uitofp <16 x i64> %a to <16 x float>
- ret <16 x float> %b
-}
-
-define <8 x double> @uito8f64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind {
-; GENERIC-LABEL: uito8f64_mask:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} # sched: [4:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: uito8f64_mask:
-; SKX: # %bb.0:
-; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-; VLNOBW-LABEL: uito8f64_mask:
-; VLNOBW: # %bb.0:
-; VLNOBW-NEXT: kmovw %edi, %k1
-; VLNOBW-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1}
-; VLNOBW-NEXT: ret{{[l|q]}}
- %1 = bitcast i8 %c to <8 x i1>
- %2 = uitofp <8 x i32> %b to <8 x double>
- %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> %a
- ret <8 x double> %3
-}
-define <8 x double> @uito8f64_maskz(<8 x i32> %a, i8 %b) nounwind {
-; GENERIC-LABEL: uito8f64_maskz:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: uito8f64_maskz:
-; SKX: # %bb.0:
-; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %1 = bitcast i8 %b to <8 x i1>
- %2 = uitofp <8 x i32> %a to <8 x double>
- %3 = select <8 x i1> %1, <8 x double> %2, <8 x double> zeroinitializer
- ret <8 x double> %3
-}
-
-define <4 x double> @uito4f64(<4 x i32> %a) nounwind {
-; GENERIC-LABEL: uito4f64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtudq2pd %xmm0, %ymm0 # sched: [4:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: uito4f64:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtudq2pd %xmm0, %ymm0 # sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = uitofp <4 x i32> %a to <4 x double>
- ret <4 x double> %b
-}
-
-define <16 x float> @uito16f32(<16 x i32> %a) nounwind {
-; GENERIC-LABEL: uito16f32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtudq2ps %zmm0, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: uito16f32:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtudq2ps %zmm0, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = uitofp <16 x i32> %a to <16 x float>
- ret <16 x float> %b
-}
-
-define <8 x double> @uito8f64(<8 x i32> %a) {
-; GENERIC-LABEL: uito8f64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtudq2pd %ymm0, %zmm0 # sched: [4:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: uito8f64:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtudq2pd %ymm0, %zmm0 # sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = uitofp <8 x i32> %a to <8 x double>
- ret <8 x double> %b
-}
-
-define <8 x float> @uito8f32(<8 x i32> %a) nounwind {
-; GENERIC-LABEL: uito8f32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtudq2ps %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: uito8f32:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtudq2ps %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = uitofp <8 x i32> %a to <8 x float>
- ret <8 x float> %b
-}
-
-define <4 x float> @uito4f32(<4 x i32> %a) nounwind {
-; GENERIC-LABEL: uito4f32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtudq2ps %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: uito4f32:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtudq2ps %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = uitofp <4 x i32> %a to <4 x float>
- ret <4 x float> %b
-}
-
-define i32 @fptosi(float %a) nounwind {
-; GENERIC-LABEL: fptosi:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvttss2si %xmm0, %eax # sched: [5:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: fptosi:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvttss2si %xmm0, %eax # sched: [6:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = fptosi float %a to i32
- ret i32 %b
-}
-
-define i32 @fptoui(float %a) nounwind {
-; GENERIC-LABEL: fptoui:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvttss2usi %xmm0, %eax # sched: [5:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: fptoui:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvttss2usi %xmm0, %eax # sched: [6:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = fptoui float %a to i32
- ret i32 %b
-}
-
-define float @uitof32(i32 %a) nounwind {
-; GENERIC-LABEL: uitof32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0 # sched: [5:2.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: uitof32:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = uitofp i32 %a to float
- ret float %b
-}
-
-define double @uitof64(i32 %a) nounwind {
-; GENERIC-LABEL: uitof64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: uitof64:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = uitofp i32 %a to double
- ret double %b
-}
-
-define <16 x float> @sbto16f32(<16 x i32> %a) {
-; GENERIC-LABEL: sbto16f32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33]
-; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sbto16f32:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25]
-; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp slt <16 x i32> %a, zeroinitializer
- %1 = sitofp <16 x i1> %mask to <16 x float>
- ret <16 x float> %1
-}
-
-define <16 x float> @scto16f32(<16 x i8> %a) {
-; GENERIC-LABEL: scto16f32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: scto16f32:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxbd %xmm0, %zmm0 # sched: [3:1.00]
-; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %1 = sitofp <16 x i8> %a to <16 x float>
- ret <16 x float> %1
-}
-
-define <16 x float> @ssto16f32(<16 x i16> %a) {
-; GENERIC-LABEL: ssto16f32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxwd %ymm0, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: ssto16f32:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxwd %ymm0, %zmm0 # sched: [3:1.00]
-; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %1 = sitofp <16 x i16> %a to <16 x float>
- ret <16 x float> %1
-}
-
-define <8 x double> @ssto16f64(<8 x i16> %a) {
-; GENERIC-LABEL: ssto16f64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: ssto16f64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00]
-; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %1 = sitofp <8 x i16> %a to <8 x double>
- ret <8 x double> %1
-}
-
-define <8 x double> @scto8f64(<8 x i8> %a) {
-; GENERIC-LABEL: scto8f64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
-; GENERIC-NEXT: vpslld $24, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpsrad $24, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: scto8f64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
-; SKX-NEXT: vpslld $24, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpsrad $24, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %1 = sitofp <8 x i8> %a to <8 x double>
- ret <8 x double> %1
-}
-
-define <16 x double> @scto16f64(<16 x i8> %a) {
-; GENERIC-LABEL: scto16f64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm1 # sched: [1:1.00]
-; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00]
-; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00]
-; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: scto16f64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxbd %xmm0, %zmm1 # sched: [3:1.00]
-; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00]
-; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00]
-; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = sitofp <16 x i8> %a to <16 x double>
- ret <16 x double> %b
-}
-
-define <16 x double> @sbto16f64(<16 x double> %a) {
-; GENERIC-LABEL: sbto16f64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [0:0.25]
-; GENERIC-NEXT: vcmpltpd %zmm0, %zmm2, %k0 # sched: [3:1.00]
-; GENERIC-NEXT: vcmpltpd %zmm1, %zmm2, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: kunpckbw %k0, %k1, %k0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovm2d %k0, %zmm1 # sched: [1:0.33]
-; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00]
-; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00]
-; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sbto16f64:
-; SKX: # %bb.0:
-; SKX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; SKX-NEXT: vcmpltpd %zmm0, %zmm2, %k0 # sched: [3:1.00]
-; SKX-NEXT: vcmpltpd %zmm1, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: kunpckbw %k0, %k1, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2d %k0, %zmm1 # sched: [1:0.25]
-; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00]
-; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00]
-; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %cmpres = fcmp ogt <16 x double> %a, zeroinitializer
- %1 = sitofp <16 x i1> %cmpres to <16 x double>
- ret <16 x double> %1
-}
-
-define <8 x double> @sbto8f64(<8 x double> %a) {
-; GENERIC-LABEL: sbto8f64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [0:0.25]
-; GENERIC-NEXT: vcmpltpd %zmm0, %zmm1, %k0 # sched: [3:1.00]
-; GENERIC-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.33]
-; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sbto8f64:
-; SKX: # %bb.0:
-; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; SKX-NEXT: vcmpltpd %zmm0, %zmm1, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2d %k0, %ymm0 # sched: [1:0.25]
-; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %cmpres = fcmp ogt <8 x double> %a, zeroinitializer
- %1 = sitofp <8 x i1> %cmpres to <8 x double>
- ret <8 x double> %1
-}
-
-define <8 x float> @sbto8f32(<8 x float> %a) {
-; GENERIC-LABEL: sbto8f32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [0:0.25]
-; GENERIC-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sbto8f32:
-; SKX: # %bb.0:
-; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; SKX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %cmpres = fcmp ogt <8 x float> %a, zeroinitializer
- %1 = sitofp <8 x i1> %cmpres to <8 x float>
- ret <8 x float> %1
-}
-
-define <4 x float> @sbto4f32(<4 x float> %a) {
-; GENERIC-LABEL: sbto4f32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [0:0.25]
-; GENERIC-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sbto4f32:
-; SKX: # %bb.0:
-; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; SKX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %cmpres = fcmp ogt <4 x float> %a, zeroinitializer
- %1 = sitofp <4 x i1> %cmpres to <4 x float>
- ret <4 x float> %1
-}
-
-define <4 x double> @sbto4f64(<4 x double> %a) {
-; GENERIC-LABEL: sbto4f64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [0:0.25]
-; GENERIC-NEXT: vcmpltpd %ymm0, %ymm1, %k0 # sched: [3:1.00]
-; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sbto4f64:
-; SKX: # %bb.0:
-; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; SKX-NEXT: vcmpltpd %ymm0, %ymm1, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
-; SKX-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %cmpres = fcmp ogt <4 x double> %a, zeroinitializer
- %1 = sitofp <4 x i1> %cmpres to <4 x double>
- ret <4 x double> %1
-}
-
-define <2 x float> @sbto2f32(<2 x float> %a) {
-; GENERIC-LABEL: sbto2f32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [0:0.25]
-; GENERIC-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sbto2f32:
-; SKX: # %bb.0:
-; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; SKX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %cmpres = fcmp ogt <2 x float> %a, zeroinitializer
- %1 = sitofp <2 x i1> %cmpres to <2 x float>
- ret <2 x float> %1
-}
-
-define <2 x double> @sbto2f64(<2 x double> %a) {
-; GENERIC-LABEL: sbto2f64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [0:0.25]
-; GENERIC-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00]
-; GENERIC-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sbto2f64:
-; SKX: # %bb.0:
-; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00]
-; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %cmpres = fcmp ogt <2 x double> %a, zeroinitializer
- %1 = sitofp <2 x i1> %cmpres to <2 x double>
- ret <2 x double> %1
-}
-
-define <16 x float> @ucto16f32(<16 x i8> %a) {
-; GENERIC-LABEL: ucto16f32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00]
-; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: ucto16f32:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00]
-; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = uitofp <16 x i8> %a to <16 x float>
- ret <16 x float>%b
-}
-
-define <8 x double> @ucto8f64(<8 x i8> %a) {
-; GENERIC-LABEL: ucto8f64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
-; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: ucto8f64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
-; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = uitofp <8 x i8> %a to <8 x double>
- ret <8 x double> %b
-}
-
-define <16 x float> @swto16f32(<16 x i16> %a) {
-; GENERIC-LABEL: swto16f32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxwd %ymm0, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: swto16f32:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxwd %ymm0, %zmm0 # sched: [3:1.00]
-; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = sitofp <16 x i16> %a to <16 x float>
- ret <16 x float> %b
-}
-
-define <8 x double> @swto8f64(<8 x i16> %a) {
-; GENERIC-LABEL: swto8f64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: swto8f64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxwd %xmm0, %ymm0 # sched: [3:1.00]
-; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = sitofp <8 x i16> %a to <8 x double>
- ret <8 x double> %b
-}
-
-define <16 x double> @swto16f64(<16 x i16> %a) {
-; GENERIC-LABEL: swto16f64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxwd %ymm0, %zmm1 # sched: [1:1.00]
-; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00]
-; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00]
-; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: swto16f64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxwd %ymm0, %zmm1 # sched: [3:1.00]
-; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00]
-; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00]
-; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = sitofp <16 x i16> %a to <16 x double>
- ret <16 x double> %b
-}
-
-define <16 x double> @ucto16f64(<16 x i8> %a) {
-; GENERIC-LABEL: ucto16f64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00]
-; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00]
-; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00]
-; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: ucto16f64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00]
-; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00]
-; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00]
-; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = uitofp <16 x i8> %a to <16 x double>
- ret <16 x double> %b
-}
-
-define <16 x float> @uwto16f32(<16 x i16> %a) {
-; GENERIC-LABEL: uwto16f32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00]
-; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: uwto16f32:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00]
-; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = uitofp <16 x i16> %a to <16 x float>
- ret <16 x float> %b
-}
-
-define <8 x double> @uwto8f64(<8 x i16> %a) {
-; GENERIC-LABEL: uwto8f64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
-; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: uwto8f64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
-; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = uitofp <8 x i16> %a to <8 x double>
- ret <8 x double> %b
-}
-
-define <16 x double> @uwto16f64(<16 x i16> %a) {
-; GENERIC-LABEL: uwto16f64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00]
-; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00]
-; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00]
-; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: uwto16f64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00]
-; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00]
-; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00]
-; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = uitofp <16 x i16> %a to <16 x double>
- ret <16 x double> %b
-}
-
-define <16 x float> @sito16f32(<16 x i32> %a) {
-; GENERIC-LABEL: sito16f32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sito16f32:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = sitofp <16 x i32> %a to <16 x float>
- ret <16 x float> %b
-}
-
-define <16 x double> @sito16f64(<16 x i32> %a) {
-; GENERIC-LABEL: sito16f64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm2 # sched: [4:1.00]
-; GENERIC-NEXT: vextractf64x4 $1, %zmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm1 # sched: [4:1.00]
-; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sito16f64:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtdq2pd %ymm0, %zmm2 # sched: [7:1.00]
-; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0 # sched: [3:1.00]
-; SKX-NEXT: vcvtdq2pd %ymm0, %zmm1 # sched: [7:1.00]
-; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = sitofp <16 x i32> %a to <16 x double>
- ret <16 x double> %b
-}
-
-define <16 x float> @usto16f32(<16 x i16> %a) {
-; GENERIC-LABEL: usto16f32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00]
-; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: usto16f32:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00]
-; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = uitofp <16 x i16> %a to <16 x float>
- ret <16 x float> %b
-}
-
-define <16 x float> @ubto16f32(<16 x i32> %a) {
-; GENERIC-LABEL: ubto16f32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33]
-; GENERIC-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: ubto16f32:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25]
-; SKX-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:1.00]
-; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp slt <16 x i32> %a, zeroinitializer
- %1 = uitofp <16 x i1> %mask to <16 x float>
- ret <16 x float> %1
-}
-
-define <16 x double> @ubto16f64(<16 x i32> %a) {
-; GENERIC-LABEL: ubto16f64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33]
-; GENERIC-NEXT: vpsrld $31, %zmm0, %zmm1 # sched: [1:1.00]
-; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00]
-; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00]
-; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: ubto16f64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25]
-; SKX-NEXT: vpsrld $31, %zmm0, %zmm1 # sched: [1:1.00]
-; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00]
-; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00]
-; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp slt <16 x i32> %a, zeroinitializer
- %1 = uitofp <16 x i1> %mask to <16 x double>
- ret <16 x double> %1
-}
-
-define <8 x float> @ubto8f32(<8 x i32> %a) {
-; GENERIC-LABEL: ubto8f32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [0:0.25]
-; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: ubto8f32:
-; SKX: # %bb.0:
-; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp slt <8 x i32> %a, zeroinitializer
- %1 = uitofp <8 x i1> %mask to <8 x float>
- ret <8 x float> %1
-}
-
-define <8 x double> @ubto8f64(<8 x i32> %a) {
-; GENERIC-LABEL: ubto8f64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [0:0.25]
-; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpsrld $31, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [4:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: ubto8f64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpsrld $31, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 # sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp slt <8 x i32> %a, zeroinitializer
- %1 = uitofp <8 x i1> %mask to <8 x double>
- ret <8 x double> %1
-}
-
-define <4 x float> @ubto4f32(<4 x i32> %a) {
-; GENERIC-LABEL: ubto4f32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [0:0.25]
-; GENERIC-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: ubto4f32:
-; SKX: # %bb.0:
-; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; SKX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp slt <4 x i32> %a, zeroinitializer
- %1 = uitofp <4 x i1> %mask to <4 x float>
- ret <4 x float> %1
-}
-
-define <4 x double> @ubto4f64(<4 x i32> %a) {
-; GENERIC-LABEL: ubto4f64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [0:0.25]
-; GENERIC-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: ubto4f64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; SKX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp slt <4 x i32> %a, zeroinitializer
- %1 = uitofp <4 x i1> %mask to <4 x double>
- ret <4 x double> %1
-}
-
-define <2 x float> @ubto2f32(<2 x i32> %a) {
-; GENERIC-LABEL: ubto2f32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [0:0.25]
-; GENERIC-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.50]
-; GENERIC-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: ubto2f32:
-; SKX: # %bb.0:
-; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.33]
-; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp ne <2 x i32> %a, zeroinitializer
- %1 = uitofp <2 x i1> %mask to <2 x float>
- ret <2 x float> %1
-}
-
-define <2 x double> @ubto2f64(<2 x i32> %a) {
-; GENERIC-LABEL: ubto2f64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [0:0.25]
-; GENERIC-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.50]
-; GENERIC-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:0.50]
-; GENERIC-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: ubto2f64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; SKX-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] sched: [1:0.33]
-; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00]
-; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp ne <2 x i32> %a, zeroinitializer
- %1 = uitofp <2 x i1> %mask to <2 x double>
- ret <2 x double> %1
-}
-
-define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: zext_8x8mem_to_8x16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_8x8mem_to_8x16:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [9:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <8 x i8>,<8 x i8> *%i,align 1
- %x = zext <8 x i8> %a to <8 x i16>
- %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
- ret <8 x i16> %ret
-}
-
-define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: sext_8x8mem_to_8x16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_8x8mem_to_8x16:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <8 x i8>,<8 x i8> *%i,align 1
- %x = sext <8 x i8> %a to <8 x i16>
- %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
- ret <8 x i16> %ret
-}
-
-
-define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: zext_16x8mem_to_16x16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_16x8mem_to_16x16:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <16 x i8>,<16 x i8> *%i,align 1
- %x = zext <16 x i8> %a to <16 x i16>
- %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
- ret <16 x i16> %ret
-}
-
-define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: sext_16x8mem_to_16x16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_16x8mem_to_16x16:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <16 x i8>,<16 x i8> *%i,align 1
- %x = sext <16 x i8> %a to <16 x i16>
- %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
- ret <16 x i16> %ret
-}
-
-define <16 x i16> @zext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
-; GENERIC-LABEL: zext_16x8_to_16x16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_16x8_to_16x16:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = zext <16 x i8> %a to <16 x i16>
- ret <16 x i16> %x
-}
-
-define <16 x i16> @zext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: zext_16x8_to_16x16_mask:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_16x8_to_16x16_mask:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50]
-; SKX-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = zext <16 x i8> %a to <16 x i16>
- %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
- ret <16 x i16> %ret
-}
-
-define <16 x i16> @sext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
-; GENERIC-LABEL: sext_16x8_to_16x16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_16x8_to_16x16:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxbw %xmm0, %ymm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = sext <16 x i8> %a to <16 x i16>
- ret <16 x i16> %x
-}
-
-define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: sext_16x8_to_16x16_mask:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_16x8_to_16x16_mask:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50]
-; SKX-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = sext <16 x i8> %a to <16 x i16>
- %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
- ret <16 x i16> %ret
-}
-
-define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: zext_32x8mem_to_32x16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_32x8mem_to_32x16:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <32 x i8>,<32 x i8> *%i,align 1
- %x = zext <32 x i8> %a to <32 x i16>
- %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
- ret <32 x i16> %ret
-}
-
-define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: sext_32x8mem_to_32x16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_32x8mem_to_32x16:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovb2m %ymm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <32 x i8>,<32 x i8> *%i,align 1
- %x = sext <32 x i8> %a to <32 x i16>
- %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
- ret <32 x i16> %ret
-}
-
-define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
-; GENERIC-LABEL: zext_32x8_to_32x16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_32x8_to_32x16:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = zext <32 x i8> %a to <32 x i16>
- ret <32 x i16> %x
-}
-
-define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: zext_32x8_to_32x16_mask:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_32x8_to_32x16_mask:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:0.50]
-; SKX-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = zext <32 x i8> %a to <32 x i16>
- %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
- ret <32 x i16> %ret
-}
-
-define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
-; GENERIC-LABEL: sext_32x8_to_32x16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxbw %ymm0, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_32x8_to_32x16:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxbw %ymm0, %zmm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = sext <32 x i8> %a to <32 x i16>
- ret <32 x i16> %x
-}
-
-define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: sext_32x8_to_32x16_mask:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_32x8_to_32x16_mask:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:0.50]
-; SKX-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z} # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = sext <32 x i8> %a to <32 x i16>
- %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
- ret <32 x i16> %ret
-}
-
-define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: zext_4x8mem_to_4x32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_4x8mem_to_4x32:
-; SKX: # %bb.0:
-; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [9:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <4 x i8>,<4 x i8> *%i,align 1
- %x = zext <4 x i8> %a to <4 x i32>
- %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
- ret <4 x i32> %ret
-}
-
-define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: sext_4x8mem_to_4x32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_4x8mem_to_4x32:
-; SKX: # %bb.0:
-; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <4 x i8>,<4 x i8> *%i,align 1
- %x = sext <4 x i8> %a to <4 x i32>
- %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
- ret <4 x i32> %ret
-}
-
-define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: zext_8x8mem_to_8x32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_8x8mem_to_8x32:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <8 x i8>,<8 x i8> *%i,align 1
- %x = zext <8 x i8> %a to <8 x i32>
- %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
- ret <8 x i32> %ret
-}
-
-define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: sext_8x8mem_to_8x32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_8x8mem_to_8x32:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <8 x i8>,<8 x i8> *%i,align 1
- %x = sext <8 x i8> %a to <8 x i32>
- %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
- ret <8 x i32> %ret
-}
-
-define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: zext_16x8mem_to_16x32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_16x8mem_to_16x32:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <16 x i8>,<16 x i8> *%i,align 1
- %x = zext <16 x i8> %a to <16 x i32>
- %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
- ret <16 x i32> %ret
-}
-
-define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: sext_16x8mem_to_16x32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_16x8mem_to_16x32:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <16 x i8>,<16 x i8> *%i,align 1
- %x = sext <16 x i8> %a to <16 x i32>
- %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
- ret <16 x i32> %ret
-}
-
-define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: zext_16x8_to_16x32_mask:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_16x8_to_16x32_mask:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50]
-; SKX-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = zext <16 x i8> %a to <16 x i32>
- %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
- ret <16 x i32> %ret
-}
-
-define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: sext_16x8_to_16x32_mask:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_16x8_to_16x32_mask:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50]
-; SKX-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = sext <16 x i8> %a to <16 x i32>
- %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
- ret <16 x i32> %ret
-}
-
-define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
-; GENERIC-LABEL: zext_16x8_to_16x32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_16x8_to_16x32:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = zext <16 x i8> %i to <16 x i32>
- ret <16 x i32> %x
-}
-
-define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
-; GENERIC-LABEL: sext_16x8_to_16x32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_16x8_to_16x32:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxbd %xmm0, %zmm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = sext <16 x i8> %i to <16 x i32>
- ret <16 x i32> %x
-}
-
-define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: zext_2x8mem_to_2x64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_2x8mem_to_2x64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [9:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <2 x i8>,<2 x i8> *%i,align 1
- %x = zext <2 x i8> %a to <2 x i64>
- %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
- ret <2 x i64> %ret
-}
-define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: sext_2x8mem_to_2x64mask:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_2x8mem_to_2x64mask:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <2 x i8>,<2 x i8> *%i,align 1
- %x = sext <2 x i8> %a to <2 x i64>
- %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
- ret <2 x i64> %ret
-}
-define <2 x i64> @sext_2x8mem_to_2x64(<2 x i8> *%i) nounwind readnone {
-; GENERIC-LABEL: sext_2x8mem_to_2x64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxbq (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_2x8mem_to_2x64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxbq (%rdi), %xmm0 # sched: [6:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <2 x i8>,<2 x i8> *%i,align 1
- %x = sext <2 x i8> %a to <2 x i64>
- ret <2 x i64> %x
-}
-
-define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: zext_4x8mem_to_4x64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_4x8mem_to_4x64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <4 x i8>,<4 x i8> *%i,align 1
- %x = zext <4 x i8> %a to <4 x i64>
- %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
- ret <4 x i64> %ret
-}
-
-define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: sext_4x8mem_to_4x64mask:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_4x8mem_to_4x64mask:
-; SKX: # %bb.0:
-; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <4 x i8>,<4 x i8> *%i,align 1
- %x = sext <4 x i8> %a to <4 x i64>
- %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
- ret <4 x i64> %ret
-}
-
-define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone {
-; GENERIC-LABEL: sext_4x8mem_to_4x64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_4x8mem_to_4x64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxbq (%rdi), %ymm0 # sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <4 x i8>,<4 x i8> *%i,align 1
- %x = sext <4 x i8> %a to <4 x i64>
- ret <4 x i64> %x
-}
-
-define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: zext_8x8mem_to_8x64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_8x8mem_to_8x64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <8 x i8>,<8 x i8> *%i,align 1
- %x = zext <8 x i8> %a to <8 x i64>
- %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
- ret <8 x i64> %ret
-}
-
-define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: sext_8x8mem_to_8x64mask:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_8x8mem_to_8x64mask:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <8 x i8>,<8 x i8> *%i,align 1
- %x = sext <8 x i8> %a to <8 x i64>
- %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
- ret <8 x i64> %ret
-}
-
-define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone {
-; GENERIC-LABEL: sext_8x8mem_to_8x64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_8x8mem_to_8x64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <8 x i8>,<8 x i8> *%i,align 1
- %x = sext <8 x i8> %a to <8 x i64>
- ret <8 x i64> %x
-}
-
-define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: zext_4x16mem_to_4x32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_4x16mem_to_4x32:
-; SKX: # %bb.0:
-; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [9:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <4 x i16>,<4 x i16> *%i,align 1
- %x = zext <4 x i16> %a to <4 x i32>
- %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
- ret <4 x i32> %ret
-}
-
-define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: sext_4x16mem_to_4x32mask:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_4x16mem_to_4x32mask:
-; SKX: # %bb.0:
-; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <4 x i16>,<4 x i16> *%i,align 1
- %x = sext <4 x i16> %a to <4 x i32>
- %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
- ret <4 x i32> %ret
-}
-
-define <4 x i32> @sext_4x16mem_to_4x32(<4 x i16> *%i) nounwind readnone {
-; GENERIC-LABEL: sext_4x16mem_to_4x32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxwd (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_4x16mem_to_4x32:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxwd (%rdi), %xmm0 # sched: [6:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <4 x i16>,<4 x i16> *%i,align 1
- %x = sext <4 x i16> %a to <4 x i32>
- ret <4 x i32> %x
-}
-
-
-define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: zext_8x16mem_to_8x32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_8x16mem_to_8x32:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <8 x i16>,<8 x i16> *%i,align 1
- %x = zext <8 x i16> %a to <8 x i32>
- %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
- ret <8 x i32> %ret
-}
-
-define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: sext_8x16mem_to_8x32mask:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_8x16mem_to_8x32mask:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <8 x i16>,<8 x i16> *%i,align 1
- %x = sext <8 x i16> %a to <8 x i32>
- %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
- ret <8 x i32> %ret
-}
-
-define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone {
-; GENERIC-LABEL: sext_8x16mem_to_8x32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_8x16mem_to_8x32:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxwd (%rdi), %ymm0 # sched: [9:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <8 x i16>,<8 x i16> *%i,align 1
- %x = sext <8 x i16> %a to <8 x i32>
- ret <8 x i32> %x
-}
-
-define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: zext_8x16_to_8x32mask:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_8x16_to_8x32mask:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:0.50]
-; SKX-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = zext <8 x i16> %a to <8 x i32>
- %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
- ret <8 x i32> %ret
-}
-
-define <8 x i32> @zext_8x16_to_8x32(<8 x i16> %a ) nounwind readnone {
-; GENERIC-LABEL: zext_8x16_to_8x32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_8x16_to_8x32:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = zext <8 x i16> %a to <8 x i32>
- ret <8 x i32> %x
-}
-
-define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: zext_16x16mem_to_16x32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_16x16mem_to_16x32:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <16 x i16>,<16 x i16> *%i,align 1
- %x = zext <16 x i16> %a to <16 x i32>
- %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
- ret <16 x i32> %ret
-}
-
-define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: sext_16x16mem_to_16x32mask:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_16x16mem_to_16x32mask:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovb2m %xmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <16 x i16>,<16 x i16> *%i,align 1
- %x = sext <16 x i16> %a to <16 x i32>
- %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
- ret <16 x i32> %ret
-}
-
-define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone {
-; GENERIC-LABEL: sext_16x16mem_to_16x32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_16x16mem_to_16x32:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxwd (%rdi), %zmm0 # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <16 x i16>,<16 x i16> *%i,align 1
- %x = sext <16 x i16> %a to <16 x i32>
- ret <16 x i32> %x
-}
-define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: zext_16x16_to_16x32mask:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_16x16_to_16x32mask:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50]
-; SKX-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = zext <16 x i16> %a to <16 x i32>
- %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
- ret <16 x i32> %ret
-}
-
-define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %a ) nounwind readnone {
-; GENERIC-LABEL: zext_16x16_to_16x32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_16x16_to_16x32:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = zext <16 x i16> %a to <16 x i32>
- ret <16 x i32> %x
-}
-
-define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: zext_2x16mem_to_2x64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_2x16mem_to_2x64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [9:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <2 x i16>,<2 x i16> *%i,align 1
- %x = zext <2 x i16> %a to <2 x i64>
- %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
- ret <2 x i64> %ret
-}
-
-define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: sext_2x16mem_to_2x64mask:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_2x16mem_to_2x64mask:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <2 x i16>,<2 x i16> *%i,align 1
- %x = sext <2 x i16> %a to <2 x i64>
- %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
- ret <2 x i64> %ret
-}
-
-define <2 x i64> @sext_2x16mem_to_2x64(<2 x i16> *%i) nounwind readnone {
-; GENERIC-LABEL: sext_2x16mem_to_2x64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxwq (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_2x16mem_to_2x64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxwq (%rdi), %xmm0 # sched: [6:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <2 x i16>,<2 x i16> *%i,align 1
- %x = sext <2 x i16> %a to <2 x i64>
- ret <2 x i64> %x
-}
-
-define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: zext_4x16mem_to_4x64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_4x16mem_to_4x64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <4 x i16>,<4 x i16> *%i,align 1
- %x = zext <4 x i16> %a to <4 x i64>
- %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
- ret <4 x i64> %ret
-}
-
-define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: sext_4x16mem_to_4x64mask:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_4x16mem_to_4x64mask:
-; SKX: # %bb.0:
-; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <4 x i16>,<4 x i16> *%i,align 1
- %x = sext <4 x i16> %a to <4 x i64>
- %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
- ret <4 x i64> %ret
-}
-
-define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone {
-; GENERIC-LABEL: sext_4x16mem_to_4x64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_4x16mem_to_4x64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxwq (%rdi), %ymm0 # sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <4 x i16>,<4 x i16> *%i,align 1
- %x = sext <4 x i16> %a to <4 x i64>
- ret <4 x i64> %x
-}
-
-define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: zext_8x16mem_to_8x64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_8x16mem_to_8x64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <8 x i16>,<8 x i16> *%i,align 1
- %x = zext <8 x i16> %a to <8 x i64>
- %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
- ret <8 x i64> %ret
-}
-
-define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: sext_8x16mem_to_8x64mask:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_8x16mem_to_8x64mask:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <8 x i16>,<8 x i16> *%i,align 1
- %x = sext <8 x i16> %a to <8 x i64>
- %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
- ret <8 x i64> %ret
-}
-
-define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone {
-; GENERIC-LABEL: sext_8x16mem_to_8x64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_8x16mem_to_8x64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxwq (%rdi), %zmm0 # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <8 x i16>,<8 x i16> *%i,align 1
- %x = sext <8 x i16> %a to <8 x i64>
- ret <8 x i64> %x
-}
-
-define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: zext_8x16_to_8x64mask:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_8x16_to_8x64mask:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:0.50]
-; SKX-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = zext <8 x i16> %a to <8 x i64>
- %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
- ret <8 x i64> %ret
-}
-
-define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %a) nounwind readnone {
-; GENERIC-LABEL: zext_8x16_to_8x64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_8x16_to_8x64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %ret = zext <8 x i16> %a to <8 x i64>
- ret <8 x i64> %ret
-}
-
-define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: zext_2x32mem_to_2x64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_2x32mem_to_2x64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero sched: [9:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <2 x i32>,<2 x i32> *%i,align 1
- %x = zext <2 x i32> %a to <2 x i64>
- %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
- ret <2 x i64> %ret
-}
-
-define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: sext_2x32mem_to_2x64mask:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_2x32mem_to_2x64mask:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovq2m %xmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} # sched: [9:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <2 x i32>,<2 x i32> *%i,align 1
- %x = sext <2 x i32> %a to <2 x i64>
- %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
- ret <2 x i64> %ret
-}
-
-define <2 x i64> @sext_2x32mem_to_2x64(<2 x i32> *%i) nounwind readnone {
-; GENERIC-LABEL: sext_2x32mem_to_2x64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxdq (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_2x32mem_to_2x64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxdq (%rdi), %xmm0 # sched: [6:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <2 x i32>,<2 x i32> *%i,align 1
- %x = sext <2 x i32> %a to <2 x i64>
- ret <2 x i64> %x
-}
-
-define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: zext_4x32mem_to_4x64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_4x32mem_to_4x64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <4 x i32>,<4 x i32> *%i,align 1
- %x = zext <4 x i32> %a to <4 x i64>
- %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
- ret <4 x i64> %ret
-}
-
-define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: sext_4x32mem_to_4x64mask:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_4x32mem_to_4x64mask:
-; SKX: # %bb.0:
-; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovd2m %xmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <4 x i32>,<4 x i32> *%i,align 1
- %x = sext <4 x i32> %a to <4 x i64>
- %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
- ret <4 x i64> %ret
-}
-
-define <4 x i64> @sext_4x32mem_to_4x64(<4 x i32> *%i) nounwind readnone {
-; GENERIC-LABEL: sext_4x32mem_to_4x64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_4x32mem_to_4x64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxdq (%rdi), %ymm0 # sched: [9:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <4 x i32>,<4 x i32> *%i,align 1
- %x = sext <4 x i32> %a to <4 x i64>
- ret <4 x i64> %x
-}
-
-define <4 x i64> @sext_4x32_to_4x64(<4 x i32> %a) nounwind readnone {
-; GENERIC-LABEL: sext_4x32_to_4x64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_4x32_to_4x64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxdq %xmm0, %ymm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = sext <4 x i32> %a to <4 x i64>
- ret <4 x i64> %x
-}
-
-define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: zext_4x32_to_4x64mask:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovd2m %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_4x32_to_4x64mask:
-; SKX: # %bb.0:
-; SKX-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:0.50]
-; SKX-NEXT: vpmovd2m %xmm1, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = zext <4 x i32> %a to <4 x i64>
- %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
- ret <4 x i64> %ret
-}
-
-define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: zext_8x32mem_to_8x64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_8x32mem_to_8x64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <8 x i32>,<8 x i32> *%i,align 1
- %x = zext <8 x i32> %a to <8 x i64>
- %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
- ret <8 x i64> %ret
-}
-
-define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: sext_8x32mem_to_8x64mask:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_8x32mem_to_8x64mask:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovw2m %xmm0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <8 x i32>,<8 x i32> *%i,align 1
- %x = sext <8 x i32> %a to <8 x i64>
- %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
- ret <8 x i64> %ret
-}
-
-define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone {
-; GENERIC-LABEL: sext_8x32mem_to_8x64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_8x32mem_to_8x64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxdq (%rdi), %zmm0 # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load <8 x i32>,<8 x i32> *%i,align 1
- %x = sext <8 x i32> %a to <8 x i64>
- ret <8 x i64> %x
-}
-
-define <8 x i64> @sext_8x32_to_8x64(<8 x i32> %a) nounwind readnone {
-; GENERIC-LABEL: sext_8x32_to_8x64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxdq %ymm0, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_8x32_to_8x64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxdq %ymm0, %zmm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = sext <8 x i32> %a to <8 x i64>
- ret <8 x i64> %x
-}
-
-define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: zext_8x32_to_8x64mask:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_8x32_to_8x64mask:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:0.50]
-; SKX-NEXT: vpmovw2m %xmm1, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = zext <8 x i32> %a to <8 x i64>
- %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
- ret <8 x i64> %ret
-}
-define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone {
-; GENERIC-LABEL: fptrunc_test:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [4:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: fptrunc_test:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtpd2ps %zmm0, %ymm0 # sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = fptrunc <8 x double> %a to <8 x float>
- ret <8 x float> %b
-}
-
-define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone {
-; GENERIC-LABEL: fpext_test:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: fpext_test:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtps2pd %ymm0, %zmm0 # sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = fpext <8 x float> %a to <8 x double>
- ret <8 x double> %b
-}
-
-define <16 x i32> @zext_16i1_to_16xi32(i16 %b) {
-; GENERIC-LABEL: zext_16i1_to_16xi32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33]
-; GENERIC-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_16i1_to_16xi32:
-; SKX: # %bb.0:
-; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25]
-; SKX-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = bitcast i16 %b to <16 x i1>
- %c = zext <16 x i1> %a to <16 x i32>
- ret <16 x i32> %c
-}
-
-define <8 x i64> @zext_8i1_to_8xi64(i8 %b) {
-; GENERIC-LABEL: zext_8i1_to_8xi64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.33]
-; GENERIC-NEXT: vpsrlq $63, %zmm0, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_8i1_to_8xi64:
-; SKX: # %bb.0:
-; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.25]
-; SKX-NEXT: vpsrlq $63, %zmm0, %zmm0 # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = bitcast i8 %b to <8 x i1>
- %c = zext <8 x i1> %a to <8 x i64>
- ret <8 x i64> %c
-}
-
-define i16 @trunc_16i8_to_16i1(<16 x i8> %a) {
-; GENERIC-LABEL: trunc_16i8_to_16i1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00]
-; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: trunc_16i8_to_16i1:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00]
-; SKX-NEXT: # kill: def $ax killed $ax killed $eax
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask_b = trunc <16 x i8>%a to <16 x i1>
- %mask = bitcast <16 x i1> %mask_b to i16
- ret i16 %mask
-}
-
-define i16 @trunc_16i32_to_16i1(<16 x i32> %a) {
-; GENERIC-LABEL: trunc_16i32_to_16i1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpslld $31, %zmm0, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
-; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: trunc_16i32_to_16i1:
-; SKX: # %bb.0:
-; SKX-NEXT: vpslld $31, %zmm0, %zmm0 # sched: [1:1.00]
-; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
-; SKX-NEXT: # kill: def $ax killed $ax killed $eax
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask_b = trunc <16 x i32>%a to <16 x i1>
- %mask = bitcast <16 x i1> %mask_b to i16
- ret i16 %mask
-}
-
-define <4 x i32> @trunc_4i32_to_4i1(<4 x i32> %a, <4 x i32> %b) {
-; GENERIC-LABEL: trunc_4i32_to_4i1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpsrad $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: trunc_4i32_to_4i1:
-; SKX: # %bb.0:
-; SKX-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpsrad $31, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask_a = trunc <4 x i32>%a to <4 x i1>
- %mask_b = trunc <4 x i32>%b to <4 x i1>
- %a_and_b = and <4 x i1>%mask_a, %mask_b
- %res = sext <4 x i1>%a_and_b to <4 x i32>
- ret <4 x i32>%res
-}
-
-
-define i8 @trunc_8i16_to_8i1(<8 x i16> %a) {
-; GENERIC-LABEL: trunc_8i16_to_8i1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
-; GENERIC-NEXT: # kill: def $al killed $al killed $eax
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: trunc_8i16_to_8i1:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
-; SKX-NEXT: # kill: def $al killed $al killed $eax
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask_b = trunc <8 x i16>%a to <8 x i1>
- %mask = bitcast <8 x i1> %mask_b to i8
- ret i8 %mask
-}
-
-define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
-; GENERIC-LABEL: sext_8i1_8i32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_8i1_8i32:
-; SKX: # %bb.0:
-; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = icmp slt <8 x i32> %a1, %a2
- %x1 = xor <8 x i1>%x, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
- %y = sext <8 x i1> %x1 to <8 x i32>
- ret <8 x i32> %y
-}
-
-
-define i16 @trunc_i32_to_i1(i32 %a) {
-; GENERIC-LABEL: trunc_i32_to_i1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movw $-4, %ax # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %eax, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kshiftrw $1, %k0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT: kshiftlw $1, %k0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT: andl $1, %edi # sched: [1:0.33]
-; GENERIC-NEXT: kmovw %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: korw %k1, %k0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
-; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: trunc_i32_to_i1:
-; SKX: # %bb.0:
-; SKX-NEXT: movw $-4, %ax # sched: [1:0.25]
-; SKX-NEXT: kmovd %eax, %k0 # sched: [1:1.00]
-; SKX-NEXT: kshiftrw $1, %k0, %k0 # sched: [3:1.00]
-; SKX-NEXT: kshiftlw $1, %k0, %k0 # sched: [3:1.00]
-; SKX-NEXT: andl $1, %edi # sched: [1:0.25]
-; SKX-NEXT: kmovw %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
-; SKX-NEXT: # kill: def $ax killed $ax killed $eax
-; SKX-NEXT: retq # sched: [7:1.00]
- %a_i = trunc i32 %a to i1
- %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0
- %res = bitcast <16 x i1> %maskv to i16
- ret i16 %res
-}
-
-define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind {
-; GENERIC-LABEL: sext_8i1_8i16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [1:0.50]
-; GENERIC-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_8i1_8i16:
-; SKX: # %bb.0:
-; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.25]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = icmp slt <8 x i32> %a1, %a2
- %y = sext <8 x i1> %x to <8 x i16>
- ret <8 x i16> %y
-}
-
-define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind {
-; GENERIC-LABEL: sext_16i1_16i32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 # sched: [1:0.50]
-; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_16i1_16i32:
-; SKX: # %bb.0:
-; SKX-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = icmp slt <16 x i32> %a1, %a2
- %y = sext <16 x i1> %x to <16 x i32>
- ret <16 x i32> %y
-}
-
-define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind {
-; GENERIC-LABEL: sext_8i1_8i64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [1:0.50]
-; GENERIC-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: sext_8i1_8i64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpcmpgtd %ymm0, %ymm1, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = icmp slt <8 x i32> %a1, %a2
- %y = sext <8 x i1> %x to <8 x i64>
- ret <8 x i64> %y
-}
-
-define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) {
-; GENERIC-LABEL: extload_v8i64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [8:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm0, (%rsi) # sched: [1:1.00]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: extload_v8i64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [10:1.00]
-; SKX-NEXT: vmovdqa64 %zmm0, (%rsi) # sched: [1:1.00]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
- %sign_load = load <8 x i8>, <8 x i8>* %a
- %c = sext <8 x i8> %sign_load to <8 x i64>
- store <8 x i64> %c, <8 x i64>* %res
- ret void
-}
-
-define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: test21:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $7, %zmm2, %zmm2 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50]
-; GENERIC-NEXT: kshiftrq $32, %k1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: vmovdqu16 %zmm1, %zmm1 {%k1} {z} # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test21:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $7, %zmm2, %zmm2 # sched: [1:1.00]
-; SKX-NEXT: vpmovb2m %zmm2, %k1 # sched: [1:1.00]
-; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
-; SKX-NEXT: kshiftrq $32, %k1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vmovdqu16 %zmm1, %zmm1 {%k1} {z} # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %ret = select <64 x i1> %mask, <64 x i16> %x, <64 x i16> zeroinitializer
- ret <64 x i16> %ret
-}
-
-define <16 x i16> @shuffle_zext_16x8_to_16x16(<16 x i8> %a) nounwind readnone {
-; GENERIC-LABEL: shuffle_zext_16x8_to_16x16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: shuffle_zext_16x8_to_16x16:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %1 = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16>
- %2 = bitcast <32 x i8> %1 to <16 x i16>
- ret <16 x i16> %2
-}
-
-define <16 x i16> @shuffle_zext_16x8_to_16x16_mask(<16 x i8> %a, <16 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: shuffle_zext_16x8_to_16x16_mask:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: shuffle_zext_16x8_to_16x16_mask:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:0.50]
-; SKX-NEXT: vpmovb2m %xmm1, %k1 # sched: [1:1.00]
-; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16>
- %bc = bitcast <32 x i8> %x to <16 x i16>
- %ret = select <16 x i1> %mask, <16 x i16> %bc, <16 x i16> zeroinitializer
- ret <16 x i16> %ret
-}
-
-define <16 x i16> @zext_32x8_to_16x16(<32 x i8> %a) {
-; GENERIC-LABEL: zext_32x8_to_16x16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_32x8_to_16x16:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 1, i32 32, i32 2, i32 32, i32 3, i32 32, i32 4, i32 32, i32 5, i32 32, i32 6, i32 32, i32 7, i32 32, i32 8, i32 32, i32 9, i32 32, i32 10, i32 32, i32 11, i32 32, i32 12, i32 32, i32 13, i32 32, i32 14, i32 32, i32 15, i32 32>
- %2 = bitcast <32 x i8> %1 to <16 x i16>
- ret <16 x i16> %2
-}
-
-define <8 x i32> @zext_32x8_to_8x32(<32 x i8> %a) {
-; GENERIC-LABEL: zext_32x8_to_8x32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_32x8_to_8x32:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 4, i32 32, i32 32, i32 32, i32 5, i32 32, i32 32, i32 32, i32 6, i32 32, i32 32, i32 32, i32 7, i32 32, i32 32, i32 32>
- %2 = bitcast <32 x i8> %1 to <8 x i32>
- ret <8 x i32> %2
-}
-
-define <4 x i64> @zext_32x8_to_4x64(<32 x i8> %a) {
-; GENERIC-LABEL: zext_32x8_to_4x64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_32x8_to_4x64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
- %2 = bitcast <32 x i8> %1 to <4 x i64>
- ret <4 x i64> %2
-}
-
-define <8 x i32> @zext_16x16_to_8x32(<16 x i16> %a) {
-; GENERIC-LABEL: zext_16x16_to_8x32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_16x16_to_8x32:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16>
- %2 = bitcast <16 x i16> %1 to <8 x i32>
- ret <8 x i32> %2
-}
-
-define <4 x i64> @zext_16x16_to_4x64(<16 x i16> %a) {
-; GENERIC-LABEL: zext_16x16_to_4x64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_16x16_to_4x64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 2, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16>
- %2 = bitcast <16 x i16> %1 to <4 x i64>
- ret <4 x i64> %2
-}
-
-define <4 x i64> @zext_8x32_to_4x64(<8 x i32> %a) {
-; GENERIC-LABEL: zext_8x32_to_4x64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_8x32_to_4x64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %1 = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8>
- %2 = bitcast <8 x i32> %1 to <4 x i64>
- ret <4 x i64> %2
-}
-
-define <64 x i8> @zext_64xi1_to_64xi8(<64 x i8> %x, <64 x i8> %y) #0 {
-; GENERIC-LABEL: zext_64xi1_to_64xi8:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpeqb %zmm1, %zmm0, %k1 # sched: [1:0.50]
-; GENERIC-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_64xi1_to_64xi8:
-; SKX: # %bb.0:
-; SKX-NEXT: vpcmpeqb %zmm1, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z} # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp eq <64 x i8> %x, %y
- %1 = zext <64 x i1> %mask to <64 x i8>
- ret <64 x i8> %1
-}
-
-define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 {
-; GENERIC-LABEL: zext_32xi1_to_32xi16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # sched: [1:0.50]
-; GENERIC-NEXT: vpmovm2w %k0, %zmm0 # sched: [1:0.33]
-; GENERIC-NEXT: vpsrlw $15, %zmm0, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_32xi1_to_32xi16:
-; SKX: # %bb.0:
-; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2w %k0, %zmm0 # sched: [1:0.25]
-; SKX-NEXT: vpsrlw $15, %zmm0, %zmm0 # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp eq <32 x i16> %x, %y
- %1 = zext <32 x i1> %mask to <32 x i16>
- ret <32 x i16> %1
-}
-
-define <16 x i16> @zext_16xi1_to_16xi16(<16 x i16> %x, <16 x i16> %y) #0 {
-; GENERIC-LABEL: zext_16xi1_to_16xi16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpsrlw $15, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_16xi1_to_16xi16:
-; SKX: # %bb.0:
-; SKX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpsrlw $15, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp eq <16 x i16> %x, %y
- %1 = zext <16 x i1> %mask to <16 x i16>
- ret <16 x i16> %1
-}
-
-
-define <32 x i8> @zext_32xi1_to_32xi8(<32 x i16> %x, <32 x i16> %y) #0 {
-; GENERIC-LABEL: zext_32xi1_to_32xi8:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 # sched: [1:0.50]
-; GENERIC-NEXT: vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_32xi1_to_32xi8:
-; SKX: # %bb.0:
-; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z} # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp eq <32 x i16> %x, %y
- %1 = zext <32 x i1> %mask to <32 x i8>
- ret <32 x i8> %1
-}
-
-define <4 x i32> @zext_4xi1_to_4x32(<4 x i8> %x, <4 x i8> %y) #0 {
-; GENERIC-LABEL: zext_4xi1_to_4x32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpbroadcastd {{.*#+}} xmm2 = [255,255,255,255] sched: [7:0.50]
-; GENERIC-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_4xi1_to_4x32:
-; SKX: # %bb.0:
-; SKX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [255,255,255,255] sched: [6:0.50]
-; SKX-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33]
-; SKX-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpsrld $31, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp eq <4 x i8> %x, %y
- %1 = zext <4 x i1> %mask to <4 x i32>
- ret <4 x i32> %1
-}
-
-define <2 x i64> @zext_2xi1_to_2xi64(<2 x i8> %x, <2 x i8> %y) #0 {
-; GENERIC-LABEL: zext_2xi1_to_2xi64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpbroadcastq {{.*#+}} xmm2 = [255,255] sched: [7:0.50]
-; GENERIC-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpsrlq $63, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_2xi1_to_2xi64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpbroadcastq {{.*#+}} xmm2 = [255,255] sched: [6:0.50]
-; SKX-NEXT: vpand %xmm2, %xmm1, %xmm1 # sched: [1:0.33]
-; SKX-NEXT: vpand %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpsrlq $63, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp eq <2 x i8> %x, %y
- %1 = zext <2 x i1> %mask to <2 x i64>
- ret <2 x i64> %1
-}
-
-define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
-; GENERIC-LABEL: test_x86_fmadd_ps_z:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
-; GENERIC-NEXT: vaddps %zmm2, %zmm0, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_x86_fmadd_ps_z:
-; SKX: # %bb.0:
-; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: vaddps %zmm2, %zmm0, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = fmul <16 x float> %a0, %a1
- %res = fadd <16 x float> %x, %a2
- ret <16 x float> %res
-}
-
-define <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
-; GENERIC-LABEL: test_x86_fmsub_ps_z:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
-; GENERIC-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_x86_fmsub_ps_z:
-; SKX: # %bb.0:
-; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = fmul <16 x float> %a0, %a1
- %res = fsub <16 x float> %x, %a2
- ret <16 x float> %res
-}
-
-define <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
-; GENERIC-LABEL: test_x86_fnmadd_ps_z:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
-; GENERIC-NEXT: vsubps %zmm0, %zmm2, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_x86_fnmadd_ps_z:
-; SKX: # %bb.0:
-; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: vsubps %zmm0, %zmm2, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = fmul <16 x float> %a0, %a1
- %res = fsub <16 x float> %a2, %x
- ret <16 x float> %res
-}
-
-define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
-; GENERIC-LABEL: test_x86_fnmsub_ps_z:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
-; GENERIC-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:1.00]
-; GENERIC-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_x86_fnmsub_ps_z:
-; SKX: # %bb.0:
-; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
-; SKX-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = fmul <16 x float> %a0, %a1
- %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
- float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
- float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
- float -0.000000e+00>, %x
- %res = fsub <16 x float> %y, %a2
- ret <16 x float> %res
-}
-
-define <8 x double> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
-; GENERIC-LABEL: test_x86_fmadd_pd_z:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
-; GENERIC-NEXT: vaddpd %zmm2, %zmm0, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_x86_fmadd_pd_z:
-; SKX: # %bb.0:
-; SKX-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: vaddpd %zmm2, %zmm0, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = fmul <8 x double> %a0, %a1
- %res = fadd <8 x double> %x, %a2
- ret <8 x double> %res
-}
-
-define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
-; GENERIC-LABEL: test_x86_fmsub_pd_z:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
-; GENERIC-NEXT: vsubpd %zmm2, %zmm0, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_x86_fmsub_pd_z:
-; SKX: # %bb.0:
-; SKX-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: vsubpd %zmm2, %zmm0, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = fmul <8 x double> %a0, %a1
- %res = fsub <8 x double> %x, %a2
- ret <8 x double> %res
-}
-
-define double @test_x86_fmsub_213(double %a0, double %a1, double %a2) {
-; GENERIC-LABEL: test_x86_fmsub_213:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: vsubsd %xmm2, %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_x86_fmsub_213:
-; SKX: # %bb.0:
-; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vsubsd %xmm2, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = fmul double %a0, %a1
- %res = fsub double %x, %a2
- ret double %res
-}
-
-define double @test_x86_fmsub_213_m(double %a0, double %a1, double * %a2_ptr) {
-; GENERIC-LABEL: test_x86_fmsub_213_m:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_x86_fmsub_213_m:
-; SKX: # %bb.0:
-; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a2 = load double , double *%a2_ptr
- %x = fmul double %a0, %a1
- %res = fsub double %x, %a2
- ret double %res
-}
-
-define double @test_x86_fmsub_231_m(double %a0, double %a1, double * %a2_ptr) {
-; GENERIC-LABEL: test_x86_fmsub_231_m:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_x86_fmsub_231_m:
-; SKX: # %bb.0:
-; SKX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a2 = load double , double *%a2_ptr
- %x = fmul double %a0, %a2
- %res = fsub double %x, %a1
- ret double %res
-}
-
-define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind {
-; GENERIC-LABEL: test231_br:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmulps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [12:1.00]
-; GENERIC-NEXT: vaddps %zmm1, %zmm0, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test231_br:
-; SKX: # %bb.0:
-; SKX-NEXT: vmulps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [11:0.50]
-; SKX-NEXT: vaddps %zmm1, %zmm0, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b1 = fmul <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
- %b2 = fadd <16 x float> %b1, %a2
- ret <16 x float> %b2
-}
-
-define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind {
-; GENERIC-LABEL: test213_br:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [5:1.00]
-; GENERIC-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [10:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test213_br:
-; SKX: # %bb.0:
-; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b1 = fmul <16 x float> %a1, %a2
- %b2 = fadd <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
- ret <16 x float> %b2
-}
-
-;mask (a*c+b , a)
-define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
-; GENERIC-LABEL: test_x86_fmadd132_ps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vmulps (%rdi), %zmm0, %zmm2 # sched: [12:1.00]
-; GENERIC-NEXT: vaddps %zmm1, %zmm2, %zmm0 {%k1} # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_x86_fmadd132_ps:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:0.50]
-; SKX-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:1.00]
-; SKX-NEXT: vmulps (%rdi), %zmm0, %zmm2 # sched: [11:0.50]
-; SKX-NEXT: vaddps %zmm1, %zmm2, %zmm0 {%k1} # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1
- %x = fmul <16 x float> %a0, %a2
- %y = fadd <16 x float> %x, %a1
- %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a0
- ret <16 x float> %res
-}
-
-;mask (a*c+b , b)
-define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
-; GENERIC-LABEL: test_x86_fmadd231_ps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vmulps (%rdi), %zmm0, %zmm0 # sched: [12:1.00]
-; GENERIC-NEXT: vaddps %zmm1, %zmm0, %zmm1 {%k1} # sched: [3:1.00]
-; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_x86_fmadd231_ps:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:0.50]
-; SKX-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:1.00]
-; SKX-NEXT: vmulps (%rdi), %zmm0, %zmm0 # sched: [11:0.50]
-; SKX-NEXT: vaddps %zmm1, %zmm0, %zmm1 {%k1} # sched: [4:0.50]
-; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1
- %x = fmul <16 x float> %a0, %a2
- %y = fadd <16 x float> %x, %a1
- %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
- ret <16 x float> %res
-}
-
-;mask (b*a+c , b)
-define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
-; GENERIC-LABEL: test_x86_fmadd213_ps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [5:1.00]
-; GENERIC-NEXT: vaddps (%rdi), %zmm0, %zmm1 {%k1} # sched: [10:1.00]
-; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_x86_fmadd213_ps:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:0.50]
-; SKX-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:1.00]
-; SKX-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: vaddps (%rdi), %zmm0, %zmm1 {%k1} # sched: [11:0.50]
-; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1
- %x = fmul <16 x float> %a1, %a0
- %y = fadd <16 x float> %x, %a2
- %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
- ret <16 x float> %res
-}
-
-define <16 x i32> @vpandd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
-; GENERIC-LABEL: vpandd:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
-; GENERIC-NEXT: vpandd %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: vpandd:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
-; SKX-NEXT: vpandd %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- ; Force the execution domain with an add.
- %a2 = add <16 x i32> %a, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2,
- i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
- %x = and <16 x i32> %a2, %b
- ret <16 x i32> %x
-}
-
-define <16 x i32> @vpandnd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
-; GENERIC-LABEL: vpandnd:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
-; GENERIC-NEXT: vpandnd %zmm0, %zmm1, %zmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: vpandnd:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
-; SKX-NEXT: vpandnd %zmm0, %zmm1, %zmm0 # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- ; Force the execution domain with an add.
- %a2 = add <16 x i32> %a, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3,
- i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
- %b2 = xor <16 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1,
- i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
- %x = and <16 x i32> %a2, %b2
- ret <16 x i32> %x
-}
-
-define <16 x i32> @vpord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
-; GENERIC-LABEL: vpord:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
-; GENERIC-NEXT: vpord %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: vpord:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
-; SKX-NEXT: vpord %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- ; Force the execution domain with an add.
- %a2 = add <16 x i32> %a, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4,
- i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
- %x = or <16 x i32> %a2, %b
- ret <16 x i32> %x
-}
-
-define <16 x i32> @vpxord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ssp {
-; GENERIC-LABEL: vpxord:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
-; GENERIC-NEXT: vpxord %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: vpxord:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50]
-; SKX-NEXT: vpxord %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- ; Force the execution domain with an add.
- %a2 = add <16 x i32> %a, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5,
- i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
- %x = xor <16 x i32> %a2, %b
- ret <16 x i32> %x
-}
-
-define <8 x i64> @vpandq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
-; GENERIC-LABEL: vpandq:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
-; GENERIC-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: vpandq:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
-; SKX-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- ; Force the execution domain with an add.
- %a2 = add <8 x i64> %a, <i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6>
- %x = and <8 x i64> %a2, %b
- ret <8 x i64> %x
-}
-
-define <8 x i64> @vpandnq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
-; GENERIC-LABEL: vpandnq:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
-; GENERIC-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: vpandnq:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
-; SKX-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- ; Force the execution domain with an add.
- %a2 = add <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
- %b2 = xor <8 x i64> %b, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
- %x = and <8 x i64> %a2, %b2
- ret <8 x i64> %x
-}
-
-define <8 x i64> @vporq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
-; GENERIC-LABEL: vporq:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
-; GENERIC-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: vporq:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
-; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- ; Force the execution domain with an add.
- %a2 = add <8 x i64> %a, <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
- %x = or <8 x i64> %a2, %b
- ret <8 x i64> %x
-}
-
-define <8 x i64> @vpxorq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ssp {
-; GENERIC-LABEL: vpxorq:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
-; GENERIC-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: vpxorq:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50]
-; SKX-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- ; Force the execution domain with an add.
- %a2 = add <8 x i64> %a, <i64 9, i64 9, i64 9, i64 9, i64 9, i64 9, i64 9, i64 9>
- %x = xor <8 x i64> %a2, %b
- ret <8 x i64> %x
-}
-
-define <64 x i8> @and_v64i8(<64 x i8> %a, <64 x i8> %b) {
-; GENERIC-LABEL: and_v64i8:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: and_v64i8:
-; SKX: # %bb.0:
-; SKX-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = and <64 x i8> %a, %b
- ret <64 x i8> %res
-}
-
-define <64 x i8> @andn_v64i8(<64 x i8> %a, <64 x i8> %b) {
-; GENERIC-LABEL: andn_v64i8:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: andn_v64i8:
-; SKX: # %bb.0:
-; SKX-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b2 = xor <64 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
- i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
- %res = and <64 x i8> %a, %b2
- ret <64 x i8> %res
-}
-
-define <64 x i8> @or_v64i8(<64 x i8> %a, <64 x i8> %b) {
-; GENERIC-LABEL: or_v64i8:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: or_v64i8:
-; SKX: # %bb.0:
-; SKX-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = or <64 x i8> %a, %b
- ret <64 x i8> %res
-}
-
-define <64 x i8> @xor_v64i8(<64 x i8> %a, <64 x i8> %b) {
-; GENERIC-LABEL: xor_v64i8:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: xor_v64i8:
-; SKX: # %bb.0:
-; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = xor <64 x i8> %a, %b
- ret <64 x i8> %res
-}
-
-define <32 x i16> @and_v32i16(<32 x i16> %a, <32 x i16> %b) {
-; GENERIC-LABEL: and_v32i16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: and_v32i16:
-; SKX: # %bb.0:
-; SKX-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = and <32 x i16> %a, %b
- ret <32 x i16> %res
-}
-
-define <32 x i16> @andn_v32i16(<32 x i16> %a, <32 x i16> %b) {
-; GENERIC-LABEL: andn_v32i16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: andn_v32i16:
-; SKX: # %bb.0:
-; SKX-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b2 = xor <32 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1,
- i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
- %res = and <32 x i16> %a, %b2
- ret <32 x i16> %res
-}
-
-define <32 x i16> @or_v32i16(<32 x i16> %a, <32 x i16> %b) {
-; GENERIC-LABEL: or_v32i16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: or_v32i16:
-; SKX: # %bb.0:
-; SKX-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = or <32 x i16> %a, %b
- ret <32 x i16> %res
-}
-
-define <32 x i16> @xor_v32i16(<32 x i16> %a, <32 x i16> %b) {
-; GENERIC-LABEL: xor_v32i16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: xor_v32i16:
-; SKX: # %bb.0:
-; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = xor <32 x i16> %a, %b
- ret <32 x i16> %res
-}
-
-define <16 x float> @masked_and_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) {
-; GENERIC-LABEL: masked_and_v16f32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: masked_and_v16f32:
-; SKX: # %bb.0:
-; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50]
-; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a1 = bitcast <16 x float> %a to <16 x i32>
- %b1 = bitcast <16 x float> %b to <16 x i32>
- %passThru1 = bitcast <16 x float> %passThru to <16 x i32>
- %mask1 = bitcast i16 %mask to <16 x i1>
- %op = and <16 x i32> %a1, %b1
- %select = select <16 x i1> %mask1, <16 x i32> %op, <16 x i32> %passThru1
- %cast = bitcast <16 x i32> %select to <16 x float>
- %add = fadd <16 x float> %c, %cast
- ret <16 x float> %add
-}
-
-define <16 x float> @masked_or_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) {
-; GENERIC-LABEL: masked_or_v16f32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: masked_or_v16f32:
-; SKX: # %bb.0:
-; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50]
-; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a1 = bitcast <16 x float> %a to <16 x i32>
- %b1 = bitcast <16 x float> %b to <16 x i32>
- %passThru1 = bitcast <16 x float> %passThru to <16 x i32>
- %mask1 = bitcast i16 %mask to <16 x i1>
- %op = and <16 x i32> %a1, %b1
- %select = select <16 x i1> %mask1, <16 x i32> %op, <16 x i32> %passThru1
- %cast = bitcast <16 x i32> %select to <16 x float>
- %add = fadd <16 x float> %c, %cast
- ret <16 x float> %add
-}
-
-define <16 x float> @masked_xor_v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %passThru, i16 %mask, <16 x float> %c) {
-; GENERIC-LABEL: masked_xor_v16f32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: masked_xor_v16f32:
-; SKX: # %bb.0:
-; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50]
-; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a1 = bitcast <16 x float> %a to <16 x i32>
- %b1 = bitcast <16 x float> %b to <16 x i32>
- %passThru1 = bitcast <16 x float> %passThru to <16 x i32>
- %mask1 = bitcast i16 %mask to <16 x i1>
- %op = and <16 x i32> %a1, %b1
- %select = select <16 x i1> %mask1, <16 x i32> %op, <16 x i32> %passThru1
- %cast = bitcast <16 x i32> %select to <16 x float>
- %add = fadd <16 x float> %c, %cast
- ret <16 x float> %add
-}
-
-define <8 x double> @masked_and_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) {
-; GENERIC-LABEL: masked_and_v8f64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: masked_and_v8f64:
-; SKX: # %bb.0:
-; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50]
-; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a1 = bitcast <8 x double> %a to <8 x i64>
- %b1 = bitcast <8 x double> %b to <8 x i64>
- %passThru1 = bitcast <8 x double> %passThru to <8 x i64>
- %mask1 = bitcast i8 %mask to <8 x i1>
- %op = and <8 x i64> %a1, %b1
- %select = select <8 x i1> %mask1, <8 x i64> %op, <8 x i64> %passThru1
- %cast = bitcast <8 x i64> %select to <8 x double>
- %add = fadd <8 x double> %c, %cast
- ret <8 x double> %add
-}
-
-define <8 x double> @masked_or_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) {
-; GENERIC-LABEL: masked_or_v8f64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: masked_or_v8f64:
-; SKX: # %bb.0:
-; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50]
-; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a1 = bitcast <8 x double> %a to <8 x i64>
- %b1 = bitcast <8 x double> %b to <8 x i64>
- %passThru1 = bitcast <8 x double> %passThru to <8 x i64>
- %mask1 = bitcast i8 %mask to <8 x i1>
- %op = and <8 x i64> %a1, %b1
- %select = select <8 x i1> %mask1, <8 x i64> %op, <8 x i64> %passThru1
- %cast = bitcast <8 x i64> %select to <8 x double>
- %add = fadd <8 x double> %c, %cast
- ret <8 x double> %add
-}
-
-define <8 x double> @masked_xor_v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %passThru, i8 %mask, <8 x double> %c) {
-; GENERIC-LABEL: masked_xor_v8f64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: masked_xor_v8f64:
-; SKX: # %bb.0:
-; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50]
-; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a1 = bitcast <8 x double> %a to <8 x i64>
- %b1 = bitcast <8 x double> %b to <8 x i64>
- %passThru1 = bitcast <8 x double> %passThru to <8 x i64>
- %mask1 = bitcast i8 %mask to <8 x i1>
- %op = and <8 x i64> %a1, %b1
- %select = select <8 x i1> %mask1, <8 x i64> %op, <8 x i64> %passThru1
- %cast = bitcast <8 x i64> %select to <8 x double>
- %add = fadd <8 x double> %c, %cast
- ret <8 x double> %add
-}
-
-define <8 x i64> @test_mm512_mask_and_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) {
-; GENERIC-LABEL: test_mm512_mask_and_epi32:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vandps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_mm512_mask_and_epi32:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vandps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %and1.i.i = and <8 x i64> %__a, %__b
- %0 = bitcast <8 x i64> %and1.i.i to <16 x i32>
- %1 = bitcast <8 x i64> %__src to <16 x i32>
- %2 = bitcast i16 %__k to <16 x i1>
- %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1
- %4 = bitcast <16 x i32> %3 to <8 x i64>
- ret <8 x i64> %4
-}
-
-define <8 x i64> @test_mm512_mask_or_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) {
-; GENERIC-LABEL: test_mm512_mask_or_epi32:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_mm512_mask_or_epi32:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %or1.i.i = or <8 x i64> %__a, %__b
- %0 = bitcast <8 x i64> %or1.i.i to <16 x i32>
- %1 = bitcast <8 x i64> %__src to <16 x i32>
- %2 = bitcast i16 %__k to <16 x i1>
- %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1
- %4 = bitcast <16 x i32> %3 to <8 x i64>
- ret <8 x i64> %4
-}
-
-define <8 x i64> @test_mm512_mask_xor_epi32(<8 x i64> %__src, i16 zeroext %__k, <8 x i64> %__a, <8 x i64> %__b) {
-; GENERIC-LABEL: test_mm512_mask_xor_epi32:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_mm512_mask_xor_epi32:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %xor1.i.i = xor <8 x i64> %__a, %__b
- %0 = bitcast <8 x i64> %xor1.i.i to <16 x i32>
- %1 = bitcast <8 x i64> %__src to <16 x i32>
- %2 = bitcast i16 %__k to <16 x i1>
- %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1
- %4 = bitcast <16 x i32> %3 to <8 x i64>
- ret <8 x i64> %4
-}
-
-define <8 x double> @test_mm512_mask_xor_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
-; GENERIC-LABEL: test_mm512_mask_xor_pd:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vxorpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_mm512_mask_xor_pd:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vxorpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %0 = bitcast <8 x double> %__A to <8 x i64>
- %1 = bitcast <8 x double> %__B to <8 x i64>
- %xor.i.i = xor <8 x i64> %0, %1
- %2 = bitcast <8 x i64> %xor.i.i to <8 x double>
- %3 = bitcast i8 %__U to <8 x i1>
- %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W
- ret <8 x double> %4
-}
-
-define <8 x double> @test_mm512_maskz_xor_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
-; GENERIC-LABEL: test_mm512_maskz_xor_pd:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_mm512_maskz_xor_pd:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %0 = bitcast <8 x double> %__A to <8 x i64>
- %1 = bitcast <8 x double> %__B to <8 x i64>
- %xor.i.i = xor <8 x i64> %0, %1
- %2 = bitcast <8 x i64> %xor.i.i to <8 x double>
- %3 = bitcast i8 %__U to <8 x i1>
- %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer
- ret <8 x double> %4
-}
-
-define <16 x float> @test_mm512_mask_xor_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
-; GENERIC-LABEL: test_mm512_mask_xor_ps:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_mm512_mask_xor_ps:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %0 = bitcast <16 x float> %__A to <16 x i32>
- %1 = bitcast <16 x float> %__B to <16 x i32>
- %xor.i.i = xor <16 x i32> %0, %1
- %2 = bitcast <16 x i32> %xor.i.i to <16 x float>
- %3 = bitcast i16 %__U to <16 x i1>
- %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W
- ret <16 x float> %4
-}
-
-define <16 x float> @test_mm512_maskz_xor_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
-; GENERIC-LABEL: test_mm512_maskz_xor_ps:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_mm512_maskz_xor_ps:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %0 = bitcast <16 x float> %__A to <16 x i32>
- %1 = bitcast <16 x float> %__B to <16 x i32>
- %xor.i.i = xor <16 x i32> %0, %1
- %2 = bitcast <16 x i32> %xor.i.i to <16 x float>
- %3 = bitcast i16 %__U to <16 x i1>
- %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer
- ret <16 x float> %4
-}
-
-define <8 x double> @test_mm512_mask_or_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
-; GENERIC-LABEL: test_mm512_mask_or_pd:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vorpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_mm512_mask_or_pd:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vorpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %0 = bitcast <8 x double> %__A to <8 x i64>
- %1 = bitcast <8 x double> %__B to <8 x i64>
- %or.i.i = or <8 x i64> %1, %0
- %2 = bitcast <8 x i64> %or.i.i to <8 x double>
- %3 = bitcast i8 %__U to <8 x i1>
- %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W
- ret <8 x double> %4
-}
-
-define <8 x double> @test_mm512_maskz_or_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
-; GENERIC-LABEL: test_mm512_maskz_or_pd:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vorpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_mm512_maskz_or_pd:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vorpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %0 = bitcast <8 x double> %__A to <8 x i64>
- %1 = bitcast <8 x double> %__B to <8 x i64>
- %or.i.i = or <8 x i64> %1, %0
- %2 = bitcast <8 x i64> %or.i.i to <8 x double>
- %3 = bitcast i8 %__U to <8 x i1>
- %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer
- ret <8 x double> %4
-}
-
-define <16 x float> @test_mm512_mask_or_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
-; GENERIC-LABEL: test_mm512_mask_or_ps:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vorps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_mm512_mask_or_ps:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vorps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %0 = bitcast <16 x float> %__A to <16 x i32>
- %1 = bitcast <16 x float> %__B to <16 x i32>
- %or.i.i = or <16 x i32> %1, %0
- %2 = bitcast <16 x i32> %or.i.i to <16 x float>
- %3 = bitcast i16 %__U to <16 x i1>
- %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W
- ret <16 x float> %4
-}
-
-define <16 x float> @test_mm512_maskz_or_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
-; GENERIC-LABEL: test_mm512_maskz_or_ps:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vorps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_mm512_maskz_or_ps:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vorps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %0 = bitcast <16 x float> %__A to <16 x i32>
- %1 = bitcast <16 x float> %__B to <16 x i32>
- %or.i.i = or <16 x i32> %1, %0
- %2 = bitcast <16 x i32> %or.i.i to <16 x float>
- %3 = bitcast i16 %__U to <16 x i1>
- %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer
- ret <16 x float> %4
-}
-
-define <8 x double> @test_mm512_mask_and_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
-; GENERIC-LABEL: test_mm512_mask_and_pd:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vandpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_mm512_mask_and_pd:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vandpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %0 = bitcast <8 x double> %__A to <8 x i64>
- %1 = bitcast <8 x double> %__B to <8 x i64>
- %and.i.i = and <8 x i64> %1, %0
- %2 = bitcast <8 x i64> %and.i.i to <8 x double>
- %3 = bitcast i8 %__U to <8 x i1>
- %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W
- ret <8 x double> %4
-}
-
-define <8 x double> @test_mm512_maskz_and_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
-; GENERIC-LABEL: test_mm512_maskz_and_pd:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vandpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_mm512_maskz_and_pd:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vandpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %0 = bitcast <8 x double> %__A to <8 x i64>
- %1 = bitcast <8 x double> %__B to <8 x i64>
- %and.i.i = and <8 x i64> %1, %0
- %2 = bitcast <8 x i64> %and.i.i to <8 x double>
- %3 = bitcast i8 %__U to <8 x i1>
- %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer
- ret <8 x double> %4
-}
-
-define <16 x float> @test_mm512_mask_and_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
-; GENERIC-LABEL: test_mm512_mask_and_ps:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vandps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_mm512_mask_and_ps:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vandps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %0 = bitcast <16 x float> %__A to <16 x i32>
- %1 = bitcast <16 x float> %__B to <16 x i32>
- %and.i.i = and <16 x i32> %1, %0
- %2 = bitcast <16 x i32> %and.i.i to <16 x float>
- %3 = bitcast i16 %__U to <16 x i1>
- %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W
- ret <16 x float> %4
-}
-
-define <16 x float> @test_mm512_maskz_and_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
-; GENERIC-LABEL: test_mm512_maskz_and_ps:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vandps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_mm512_maskz_and_ps:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vandps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %0 = bitcast <16 x float> %__A to <16 x i32>
- %1 = bitcast <16 x float> %__B to <16 x i32>
- %and.i.i = and <16 x i32> %1, %0
- %2 = bitcast <16 x i32> %and.i.i to <16 x float>
- %3 = bitcast i16 %__U to <16 x i1>
- %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer
- ret <16 x float> %4
-}
-
-define <8 x double> @test_mm512_mask_andnot_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
-; GENERIC-LABEL: test_mm512_mask_andnot_pd:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vandnpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_mm512_mask_andnot_pd:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vandnpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %0 = bitcast <8 x double> %__A to <8 x i64>
- %neg.i.i = xor <8 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
- %1 = bitcast <8 x double> %__B to <8 x i64>
- %and.i.i = and <8 x i64> %1, %neg.i.i
- %2 = bitcast <8 x i64> %and.i.i to <8 x double>
- %3 = bitcast i8 %__U to <8 x i1>
- %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W
- ret <8 x double> %4
-}
-
-define <8 x double> @test_mm512_maskz_andnot_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
-; GENERIC-LABEL: test_mm512_maskz_andnot_pd:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_mm512_maskz_andnot_pd:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %0 = bitcast <8 x double> %__A to <8 x i64>
- %neg.i.i = xor <8 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
- %1 = bitcast <8 x double> %__B to <8 x i64>
- %and.i.i = and <8 x i64> %1, %neg.i.i
- %2 = bitcast <8 x i64> %and.i.i to <8 x double>
- %3 = bitcast i8 %__U to <8 x i1>
- %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer
- ret <8 x double> %4
-}
-
-define <16 x float> @test_mm512_mask_andnot_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
-; GENERIC-LABEL: test_mm512_mask_andnot_ps:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vandnps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_mm512_mask_andnot_ps:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vandnps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %0 = bitcast <16 x float> %__A to <16 x i32>
- %neg.i.i = xor <16 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
- %1 = bitcast <16 x float> %__B to <16 x i32>
- %and.i.i = and <16 x i32> %1, %neg.i.i
- %2 = bitcast <16 x i32> %and.i.i to <16 x float>
- %3 = bitcast i16 %__U to <16 x i1>
- %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W
- ret <16 x float> %4
-}
-
-define <16 x float> @test_mm512_maskz_andnot_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
-; GENERIC-LABEL: test_mm512_maskz_andnot_ps:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: kmovd %edi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_mm512_maskz_andnot_ps:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00]
-; SKX-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %0 = bitcast <16 x float> %__A to <16 x i32>
- %neg.i.i = xor <16 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
- %1 = bitcast <16 x float> %__B to <16 x i32>
- %and.i.i = and <16 x i32> %1, %neg.i.i
- %2 = bitcast <16 x i32> %and.i.i to <16 x float>
- %3 = bitcast i16 %__U to <16 x i1>
- %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer
- ret <16 x float> %4
-}
-
-define i32 @mov_test1(float %x) {
-; GENERIC-LABEL: mov_test1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovd %xmm0, %eax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test1:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovd %xmm0, %eax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = bitcast float %x to i32
- ret i32 %res
-}
-
-define <4 x i32> @mov_test2(i32 %x) {
-; GENERIC-LABEL: mov_test2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = insertelement <4 x i32>undef, i32 %x, i32 0
- ret <4 x i32>%res
-}
-
-define <2 x i64> @mov_test3(i64 %x) {
-; GENERIC-LABEL: mov_test3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovq %rdi, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovq %rdi, %xmm0 # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = insertelement <2 x i64>undef, i64 %x, i32 0
- ret <2 x i64>%res
-}
-
-define <4 x i32> @mov_test4(i32* %x) {
-; GENERIC-LABEL: mov_test4:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test4:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %y = load i32, i32* %x
- %res = insertelement <4 x i32>undef, i32 %y, i32 0
- ret <4 x i32>%res
-}
-
-define void @mov_test5(float %x, float* %y) {
-; GENERIC-LABEL: mov_test5:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovss %xmm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test5:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovss %xmm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- store float %x, float* %y, align 4
- ret void
-}
-
-define void @mov_test6(double %x, double* %y) {
-; GENERIC-LABEL: mov_test6:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovsd %xmm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test6:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovsd %xmm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- store double %x, double* %y, align 8
- ret void
-}
-
-define float @mov_test7(i32* %x) {
-; GENERIC-LABEL: mov_test7:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test7:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %y = load i32, i32* %x
- %res = bitcast i32 %y to float
- ret float %res
-}
-
-define i32 @mov_test8(<4 x i32> %x) {
-; GENERIC-LABEL: mov_test8:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovd %xmm0, %eax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test8:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovd %xmm0, %eax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = extractelement <4 x i32> %x, i32 0
- ret i32 %res
-}
-
-define i64 @mov_test9(<2 x i64> %x) {
-; GENERIC-LABEL: mov_test9:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovq %xmm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test9:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovq %xmm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = extractelement <2 x i64> %x, i32 0
- ret i64 %res
-}
-
-define <4 x i32> @mov_test10(i32* %x) {
-; GENERIC-LABEL: mov_test10:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test10:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %y = load i32, i32* %x, align 4
- %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0
- ret <4 x i32>%res
-}
-
-define <4 x float> @mov_test11(float* %x) {
-; GENERIC-LABEL: mov_test11:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test11:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %y = load float, float* %x, align 4
- %res = insertelement <4 x float>zeroinitializer, float %y, i32 0
- ret <4 x float>%res
-}
-
-define <2 x double> @mov_test12(double* %x) {
-; GENERIC-LABEL: mov_test12:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test12:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %y = load double, double* %x, align 8
- %res = insertelement <2 x double>zeroinitializer, double %y, i32 0
- ret <2 x double>%res
-}
-
-define <2 x i64> @mov_test13(i64 %x) {
-; GENERIC-LABEL: mov_test13:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovq %rdi, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test13:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovq %rdi, %xmm0 # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = insertelement <2 x i64>zeroinitializer, i64 %x, i32 0
- ret <2 x i64>%res
-}
-
-define <4 x i32> @mov_test14(i32 %x) {
-; GENERIC-LABEL: mov_test14:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test14:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = insertelement <4 x i32>zeroinitializer, i32 %x, i32 0
- ret <4 x i32>%res
-}
-
-define <4 x i32> @mov_test15(i32* %x) {
-; GENERIC-LABEL: mov_test15:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test15:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %y = load i32, i32* %x, align 4
- %res = insertelement <4 x i32>zeroinitializer, i32 %y, i32 0
- ret <4 x i32>%res
-}
-
-define <16 x i32> @mov_test16(i8 * %addr) {
-; GENERIC-LABEL: mov_test16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovups (%rdi), %zmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test16:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovups (%rdi), %zmm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vaddr = bitcast i8* %addr to <16 x i32>*
- %res = load <16 x i32>, <16 x i32>* %vaddr, align 1
- ret <16 x i32>%res
-}
-
-define <16 x i32> @mov_test17(i8 * %addr) {
-; GENERIC-LABEL: mov_test17:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps (%rdi), %zmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test17:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps (%rdi), %zmm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vaddr = bitcast i8* %addr to <16 x i32>*
- %res = load <16 x i32>, <16 x i32>* %vaddr, align 64
- ret <16 x i32>%res
-}
-
-define void @mov_test18(i8 * %addr, <8 x i64> %data) {
-; GENERIC-LABEL: mov_test18:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test18:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vaddr = bitcast i8* %addr to <8 x i64>*
- store <8 x i64>%data, <8 x i64>* %vaddr, align 64
- ret void
-}
-
-define void @mov_test19(i8 * %addr, <16 x i32> %data) {
-; GENERIC-LABEL: mov_test19:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test19:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vaddr = bitcast i8* %addr to <16 x i32>*
- store <16 x i32>%data, <16 x i32>* %vaddr, align 1
- ret void
-}
-
-define void @mov_test20(i8 * %addr, <16 x i32> %data) {
-; GENERIC-LABEL: mov_test20:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test20:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vaddr = bitcast i8* %addr to <16 x i32>*
- store <16 x i32>%data, <16 x i32>* %vaddr, align 64
- ret void
-}
-
-define <8 x i64> @mov_test21(i8 * %addr) {
-; GENERIC-LABEL: mov_test21:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps (%rdi), %zmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test21:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps (%rdi), %zmm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vaddr = bitcast i8* %addr to <8 x i64>*
- %res = load <8 x i64>, <8 x i64>* %vaddr, align 64
- ret <8 x i64>%res
-}
-
-define void @mov_test22(i8 * %addr, <8 x i64> %data) {
-; GENERIC-LABEL: mov_test22:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test22:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vaddr = bitcast i8* %addr to <8 x i64>*
- store <8 x i64>%data, <8 x i64>* %vaddr, align 1
- ret void
-}
-
-define <8 x i64> @mov_test23(i8 * %addr) {
-; GENERIC-LABEL: mov_test23:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovups (%rdi), %zmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test23:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovups (%rdi), %zmm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vaddr = bitcast i8* %addr to <8 x i64>*
- %res = load <8 x i64>, <8 x i64>* %vaddr, align 1
- ret <8 x i64>%res
-}
-
-define void @mov_test24(i8 * %addr, <8 x double> %data) {
-; GENERIC-LABEL: mov_test24:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test24:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vaddr = bitcast i8* %addr to <8 x double>*
- store <8 x double>%data, <8 x double>* %vaddr, align 64
- ret void
-}
-
-define <8 x double> @mov_test25(i8 * %addr) {
-; GENERIC-LABEL: mov_test25:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps (%rdi), %zmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test25:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps (%rdi), %zmm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vaddr = bitcast i8* %addr to <8 x double>*
- %res = load <8 x double>, <8 x double>* %vaddr, align 64
- ret <8 x double>%res
-}
-
-define void @mov_test26(i8 * %addr, <16 x float> %data) {
-; GENERIC-LABEL: mov_test26:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test26:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vaddr = bitcast i8* %addr to <16 x float>*
- store <16 x float>%data, <16 x float>* %vaddr, align 64
- ret void
-}
-
-define <16 x float> @mov_test27(i8 * %addr) {
-; GENERIC-LABEL: mov_test27:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps (%rdi), %zmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test27:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps (%rdi), %zmm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vaddr = bitcast i8* %addr to <16 x float>*
- %res = load <16 x float>, <16 x float>* %vaddr, align 64
- ret <16 x float>%res
-}
-
-define void @mov_test28(i8 * %addr, <8 x double> %data) {
-; GENERIC-LABEL: mov_test28:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test28:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vaddr = bitcast i8* %addr to <8 x double>*
- store <8 x double>%data, <8 x double>* %vaddr, align 1
- ret void
-}
-
-define <8 x double> @mov_test29(i8 * %addr) {
-; GENERIC-LABEL: mov_test29:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovups (%rdi), %zmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test29:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovups (%rdi), %zmm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vaddr = bitcast i8* %addr to <8 x double>*
- %res = load <8 x double>, <8 x double>* %vaddr, align 1
- ret <8 x double>%res
-}
-
-define void @mov_test30(i8 * %addr, <16 x float> %data) {
-; GENERIC-LABEL: mov_test30:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test30:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovups %zmm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vaddr = bitcast i8* %addr to <16 x float>*
- store <16 x float>%data, <16 x float>* %vaddr, align 1
- ret void
-}
-
-define <16 x float> @mov_test31(i8 * %addr) {
-; GENERIC-LABEL: mov_test31:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovups (%rdi), %zmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test31:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovups (%rdi), %zmm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vaddr = bitcast i8* %addr to <16 x float>*
- %res = load <16 x float>, <16 x float>* %vaddr, align 1
- ret <16 x float>%res
-}
-
-define <16 x i32> @mov_test32(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) {
-; GENERIC-LABEL: mov_test32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test32:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp ne <16 x i32> %mask1, zeroinitializer
- %vaddr = bitcast i8* %addr to <16 x i32>*
- %r = load <16 x i32>, <16 x i32>* %vaddr, align 64
- %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> %old
- ret <16 x i32>%res
-}
-
-define <16 x i32> @mov_test33(i8 * %addr, <16 x i32> %old, <16 x i32> %mask1) {
-; GENERIC-LABEL: mov_test33:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test33:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp ne <16 x i32> %mask1, zeroinitializer
- %vaddr = bitcast i8* %addr to <16 x i32>*
- %r = load <16 x i32>, <16 x i32>* %vaddr, align 1
- %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> %old
- ret <16 x i32>%res
-}
-
-define <16 x i32> @mov_test34(i8 * %addr, <16 x i32> %mask1) {
-; GENERIC-LABEL: mov_test34:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test34:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vmovdqa32 (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp ne <16 x i32> %mask1, zeroinitializer
- %vaddr = bitcast i8* %addr to <16 x i32>*
- %r = load <16 x i32>, <16 x i32>* %vaddr, align 64
- %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> zeroinitializer
- ret <16 x i32>%res
-}
-
-define <16 x i32> @mov_test35(i8 * %addr, <16 x i32> %mask1) {
-; GENERIC-LABEL: mov_test35:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test35:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp ne <16 x i32> %mask1, zeroinitializer
- %vaddr = bitcast i8* %addr to <16 x i32>*
- %r = load <16 x i32>, <16 x i32>* %vaddr, align 1
- %res = select <16 x i1> %mask, <16 x i32> %r, <16 x i32> zeroinitializer
- ret <16 x i32>%res
-}
-
-define <8 x i64> @mov_test36(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) {
-; GENERIC-LABEL: mov_test36:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test36:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp ne <8 x i64> %mask1, zeroinitializer
- %vaddr = bitcast i8* %addr to <8 x i64>*
- %r = load <8 x i64>, <8 x i64>* %vaddr, align 64
- %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> %old
- ret <8 x i64>%res
-}
-
-define <8 x i64> @mov_test37(i8 * %addr, <8 x i64> %old, <8 x i64> %mask1) {
-; GENERIC-LABEL: mov_test37:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test37:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp ne <8 x i64> %mask1, zeroinitializer
- %vaddr = bitcast i8* %addr to <8 x i64>*
- %r = load <8 x i64>, <8 x i64>* %vaddr, align 1
- %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> %old
- ret <8 x i64>%res
-}
-
-define <8 x i64> @mov_test38(i8 * %addr, <8 x i64> %mask1) {
-; GENERIC-LABEL: mov_test38:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test38:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestmq %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vmovdqa64 (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp ne <8 x i64> %mask1, zeroinitializer
- %vaddr = bitcast i8* %addr to <8 x i64>*
- %r = load <8 x i64>, <8 x i64>* %vaddr, align 64
- %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> zeroinitializer
- ret <8 x i64>%res
-}
-
-define <8 x i64> @mov_test39(i8 * %addr, <8 x i64> %mask1) {
-; GENERIC-LABEL: mov_test39:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test39:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestmq %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vmovdqu64 (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp ne <8 x i64> %mask1, zeroinitializer
- %vaddr = bitcast i8* %addr to <8 x i64>*
- %r = load <8 x i64>, <8 x i64>* %vaddr, align 1
- %res = select <8 x i1> %mask, <8 x i64> %r, <8 x i64> zeroinitializer
- ret <8 x i64>%res
-}
-
-define <16 x float> @mov_test40(i8 * %addr, <16 x float> %old, <16 x float> %mask1) {
-; GENERIC-LABEL: mov_test40:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [0:0.25]
-; GENERIC-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: vmovaps (%rdi), %zmm0 {%k1} # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test40:
-; SKX: # %bb.0:
-; SKX-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; SKX-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vmovaps (%rdi), %zmm0 {%k1} # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = fcmp one <16 x float> %mask1, zeroinitializer
- %vaddr = bitcast i8* %addr to <16 x float>*
- %r = load <16 x float>, <16 x float>* %vaddr, align 64
- %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> %old
- ret <16 x float>%res
-}
-
-define <16 x float> @mov_test41(i8 * %addr, <16 x float> %old, <16 x float> %mask1) {
-; GENERIC-LABEL: mov_test41:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [0:0.25]
-; GENERIC-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: vmovups (%rdi), %zmm0 {%k1} # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test41:
-; SKX: # %bb.0:
-; SKX-NEXT: vxorps %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; SKX-NEXT: vcmpneq_oqps %zmm2, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vmovups (%rdi), %zmm0 {%k1} # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = fcmp one <16 x float> %mask1, zeroinitializer
- %vaddr = bitcast i8* %addr to <16 x float>*
- %r = load <16 x float>, <16 x float>* %vaddr, align 1
- %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> %old
- ret <16 x float>%res
-}
-
-define <16 x float> @mov_test42(i8 * %addr, <16 x float> %mask1) {
-; GENERIC-LABEL: mov_test42:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [0:0.25]
-; GENERIC-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test42:
-; SKX: # %bb.0:
-; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; SKX-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = fcmp one <16 x float> %mask1, zeroinitializer
- %vaddr = bitcast i8* %addr to <16 x float>*
- %r = load <16 x float>, <16 x float>* %vaddr, align 64
- %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> zeroinitializer
- ret <16 x float>%res
-}
-
-define <16 x float> @mov_test43(i8 * %addr, <16 x float> %mask1) {
-; GENERIC-LABEL: mov_test43:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [0:0.25]
-; GENERIC-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test43:
-; SKX: # %bb.0:
-; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; SKX-NEXT: vcmpneq_oqps %zmm1, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vmovups (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = fcmp one <16 x float> %mask1, zeroinitializer
- %vaddr = bitcast i8* %addr to <16 x float>*
- %r = load <16 x float>, <16 x float>* %vaddr, align 1
- %res = select <16 x i1> %mask, <16 x float> %r, <16 x float> zeroinitializer
- ret <16 x float>%res
-}
-
-define <8 x double> @mov_test44(i8 * %addr, <8 x double> %old, <8 x double> %mask1) {
-; GENERIC-LABEL: mov_test44:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [0:0.25]
-; GENERIC-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: vmovapd (%rdi), %zmm0 {%k1} # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test44:
-; SKX: # %bb.0:
-; SKX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; SKX-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vmovapd (%rdi), %zmm0 {%k1} # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = fcmp one <8 x double> %mask1, zeroinitializer
- %vaddr = bitcast i8* %addr to <8 x double>*
- %r = load <8 x double>, <8 x double>* %vaddr, align 64
- %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> %old
- ret <8 x double>%res
-}
-
-define <8 x double> @mov_test45(i8 * %addr, <8 x double> %old, <8 x double> %mask1) {
-; GENERIC-LABEL: mov_test45:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [0:0.25]
-; GENERIC-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: vmovupd (%rdi), %zmm0 {%k1} # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test45:
-; SKX: # %bb.0:
-; SKX-NEXT: vxorpd %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
-; SKX-NEXT: vcmpneq_oqpd %zmm2, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vmovupd (%rdi), %zmm0 {%k1} # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = fcmp one <8 x double> %mask1, zeroinitializer
- %vaddr = bitcast i8* %addr to <8 x double>*
- %r = load <8 x double>, <8 x double>* %vaddr, align 1
- %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> %old
- ret <8 x double>%res
-}
-
-define <8 x double> @mov_test46(i8 * %addr, <8 x double> %mask1) {
-; GENERIC-LABEL: mov_test46:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [0:0.25]
-; GENERIC-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test46:
-; SKX: # %bb.0:
-; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; SKX-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = fcmp one <8 x double> %mask1, zeroinitializer
- %vaddr = bitcast i8* %addr to <8 x double>*
- %r = load <8 x double>, <8 x double>* %vaddr, align 64
- %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> zeroinitializer
- ret <8 x double>%res
-}
-
-define <8 x double> @mov_test47(i8 * %addr, <8 x double> %mask1) {
-; GENERIC-LABEL: mov_test47:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [0:0.25]
-; GENERIC-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: vmovupd (%rdi), %zmm0 {%k1} {z} # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mov_test47:
-; SKX: # %bb.0:
-; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; SKX-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vmovupd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = fcmp one <8 x double> %mask1, zeroinitializer
- %vaddr = bitcast i8* %addr to <8 x double>*
- %r = load <8 x double>, <8 x double>* %vaddr, align 1
- %res = select <8 x i1> %mask, <8 x double> %r, <8 x double> zeroinitializer
- ret <8 x double>%res
-}
-
-define i16 @mask16(i16 %x) {
-; GENERIC-LABEL: mask16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33]
-; GENERIC-NEXT: notl %eax # sched: [1:0.33]
-; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mask16:
-; SKX: # %bb.0:
-; SKX-NEXT: movl %edi, %eax # sched: [1:0.25]
-; SKX-NEXT: notl %eax # sched: [1:0.25]
-; SKX-NEXT: # kill: def $ax killed $ax killed $eax
-; SKX-NEXT: retq # sched: [7:1.00]
- %m0 = bitcast i16 %x to <16 x i1>
- %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
- %ret = bitcast <16 x i1> %m1 to i16
- ret i16 %ret
-}
-
-define i32 @mask16_zext(i16 %x) {
-; GENERIC-LABEL: mask16_zext:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: notl %edi # sched: [1:0.33]
-; GENERIC-NEXT: movzwl %di, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mask16_zext:
-; SKX: # %bb.0:
-; SKX-NEXT: notl %edi # sched: [1:0.25]
-; SKX-NEXT: movzwl %di, %eax # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
- %m0 = bitcast i16 %x to <16 x i1>
- %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
- %m2 = bitcast <16 x i1> %m1 to i16
- %ret = zext i16 %m2 to i32
- ret i32 %ret
-}
-
-define i8 @mask8(i8 %x) {
-; GENERIC-LABEL: mask8:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33]
-; GENERIC-NEXT: notb %al # sched: [1:0.33]
-; GENERIC-NEXT: # kill: def $al killed $al killed $eax
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mask8:
-; SKX: # %bb.0:
-; SKX-NEXT: movl %edi, %eax # sched: [1:0.25]
-; SKX-NEXT: notb %al # sched: [1:0.25]
-; SKX-NEXT: # kill: def $al killed $al killed $eax
-; SKX-NEXT: retq # sched: [7:1.00]
- %m0 = bitcast i8 %x to <8 x i1>
- %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
- %ret = bitcast <8 x i1> %m1 to i8
- ret i8 %ret
-}
-
-define i32 @mask8_zext(i8 %x) {
-; GENERIC-LABEL: mask8_zext:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: notb %dil # sched: [1:0.33]
-; GENERIC-NEXT: movzbl %dil, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mask8_zext:
-; SKX: # %bb.0:
-; SKX-NEXT: notb %dil # sched: [1:0.25]
-; SKX-NEXT: movzbl %dil, %eax # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
- %m0 = bitcast i8 %x to <8 x i1>
- %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
- %m2 = bitcast <8 x i1> %m1 to i8
- %ret = zext i8 %m2 to i32
- ret i32 %ret
-}
-
-define void @mask16_mem(i16* %ptr) {
-; GENERIC-LABEL: mask16_mem:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovw (%rdi), %k0 # sched: [5:0.50]
-; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mask16_mem:
-; SKX: # %bb.0:
-; SKX-NEXT: kmovw (%rdi), %k0 # sched: [7:1.00]
-; SKX-NEXT: knotw %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = load i16, i16* %ptr, align 4
- %m0 = bitcast i16 %x to <16 x i1>
- %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
- %ret = bitcast <16 x i1> %m1 to i16
- store i16 %ret, i16* %ptr, align 4
- ret void
-}
-
-define void @mask8_mem(i8* %ptr) {
-; GENERIC-LABEL: mask8_mem:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovb (%rdi), %k0 # sched: [5:0.50]
-; GENERIC-NEXT: knotb %k0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mask8_mem:
-; SKX: # %bb.0:
-; SKX-NEXT: kmovb (%rdi), %k0 # sched: [7:1.00]
-; SKX-NEXT: knotb %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = load i8, i8* %ptr, align 4
- %m0 = bitcast i8 %x to <8 x i1>
- %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
- %ret = bitcast <8 x i1> %m1 to i8
- store i8 %ret, i8* %ptr, align 4
- ret void
-}
-
-define i16 @mand16(i16 %x, i16 %y) {
-; GENERIC-LABEL: mand16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33]
-; GENERIC-NEXT: movl %edi, %ecx # sched: [1:0.33]
-; GENERIC-NEXT: andl %esi, %ecx # sched: [1:0.33]
-; GENERIC-NEXT: xorl %esi, %eax # sched: [1:0.33]
-; GENERIC-NEXT: orl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mand16:
-; SKX: # %bb.0:
-; SKX-NEXT: movl %edi, %eax # sched: [1:0.25]
-; SKX-NEXT: movl %edi, %ecx # sched: [1:0.25]
-; SKX-NEXT: andl %esi, %ecx # sched: [1:0.25]
-; SKX-NEXT: xorl %esi, %eax # sched: [1:0.25]
-; SKX-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; SKX-NEXT: # kill: def $ax killed $ax killed $eax
-; SKX-NEXT: retq # sched: [7:1.00]
- %ma = bitcast i16 %x to <16 x i1>
- %mb = bitcast i16 %y to <16 x i1>
- %mc = and <16 x i1> %ma, %mb
- %md = xor <16 x i1> %ma, %mb
- %me = or <16 x i1> %mc, %md
- %ret = bitcast <16 x i1> %me to i16
- ret i16 %ret
-}
-
-define i16 @mand16_mem(<16 x i1>* %x, <16 x i1>* %y) {
-; GENERIC-LABEL: mand16_mem:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovw (%rdi), %k0 # sched: [5:0.50]
-; GENERIC-NEXT: kmovw (%rsi), %k1 # sched: [5:0.50]
-; GENERIC-NEXT: kandw %k1, %k0, %k2 # sched: [1:0.33]
-; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: korw %k0, %k2, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
-; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: mand16_mem:
-; SKX: # %bb.0:
-; SKX-NEXT: kmovw (%rdi), %k0 # sched: [7:1.00]
-; SKX-NEXT: kmovw (%rsi), %k1 # sched: [7:1.00]
-; SKX-NEXT: kandw %k1, %k0, %k2 # sched: [1:1.00]
-; SKX-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: korw %k0, %k2, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
-; SKX-NEXT: # kill: def $ax killed $ax killed $eax
-; SKX-NEXT: retq # sched: [7:1.00]
- %ma = load <16 x i1>, <16 x i1>* %x
- %mb = load <16 x i1>, <16 x i1>* %y
- %mc = and <16 x i1> %ma, %mb
- %md = xor <16 x i1> %ma, %mb
- %me = or <16 x i1> %mc, %md
- %ret = bitcast <16 x i1> %me to i16
- ret i16 %ret
-}
-
-define i8 @shuf_test1(i16 %v) nounwind {
-; GENERIC-LABEL: shuf_test1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kshiftrw $8, %k0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
-; GENERIC-NEXT: # kill: def $al killed $al killed $eax
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: shuf_test1:
-; SKX: # %bb.0:
-; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00]
-; SKX-NEXT: kshiftrw $8, %k0, %k0 # sched: [3:1.00]
-; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
-; SKX-NEXT: # kill: def $al killed $al killed $eax
-; SKX-NEXT: retq # sched: [7:1.00]
- %v1 = bitcast i16 %v to <16 x i1>
- %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
- %mask1 = bitcast <8 x i1> %mask to i8
- ret i8 %mask1
-}
-
-define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
-; GENERIC-LABEL: zext_test1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [1:0.50]
-; GENERIC-NEXT: kshiftrw $5, %k0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
-; GENERIC-NEXT: andl $1, %eax # sched: [1:0.33]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_test1:
-; SKX: # %bb.0:
-; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: kshiftrw $5, %k0, %k0 # sched: [3:1.00]
-; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
-; SKX-NEXT: andl $1, %eax # sched: [1:0.25]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
- %cmp_res = icmp ugt <16 x i32> %a, %b
- %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
- %res = zext i1 %cmp_res.i1 to i32
- ret i32 %res
-}
-
-define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
-; GENERIC-LABEL: zext_test2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [1:0.50]
-; GENERIC-NEXT: kshiftrw $5, %k0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
-; GENERIC-NEXT: andl $1, %eax # sched: [1:0.33]
-; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_test2:
-; SKX: # %bb.0:
-; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: kshiftrw $5, %k0, %k0 # sched: [3:1.00]
-; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
-; SKX-NEXT: andl $1, %eax # sched: [1:0.25]
-; SKX-NEXT: # kill: def $ax killed $ax killed $eax
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
- %cmp_res = icmp ugt <16 x i32> %a, %b
- %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
- %res = zext i1 %cmp_res.i1 to i16
- ret i16 %res
-}
-
-define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
-; GENERIC-LABEL: zext_test3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [1:0.50]
-; GENERIC-NEXT: kshiftrw $5, %k0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
-; GENERIC-NEXT: andb $1, %al # sched: [1:0.33]
-; GENERIC-NEXT: # kill: def $al killed $al killed $eax
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: zext_test3:
-; SKX: # %bb.0:
-; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: kshiftrw $5, %k0, %k0 # sched: [3:1.00]
-; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
-; SKX-NEXT: andb $1, %al # sched: [1:0.25]
-; SKX-NEXT: # kill: def $al killed $al killed $eax
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
- %cmp_res = icmp ugt <16 x i32> %a, %b
- %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
- %res = zext i1 %cmp_res.i1 to i8
- ret i8 %res
-}
-
-define i8 @conv1(<8 x i1>* %R) {
-; GENERIC-LABEL: conv1:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: movb $-1, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: movb $-2, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
-; GENERIC-NEXT: movb $-2, %al # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: conv1:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: movb $-1, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: movb $-2, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
-; SKX-NEXT: movb $-2, %al # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- store <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %R
-
- %maskPtr = alloca <8 x i1>
- store <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %maskPtr
- %mask = load <8 x i1>, <8 x i1>* %maskPtr
- %mask_convert = bitcast <8 x i1> %mask to i8
- ret i8 %mask_convert
-}
-
-define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) {
-; GENERIC-LABEL: test4:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpleq %ymm1, %ymm0, %k1 # sched: [1:0.50]
-; GENERIC-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1} # sched: [1:0.50]
-; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test4:
-; SKX: # %bb.0:
-; SKX-NEXT: vpcmpleq %ymm1, %ymm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpcmpgtq %ymm3, %ymm2, %k0 {%k1} # sched: [3:1.00]
-; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x_gt_y = icmp sgt <4 x i64> %x, %y
- %x1_gt_y1 = icmp sgt <4 x i64> %x1, %y1
- %res = icmp sgt <4 x i1>%x_gt_y, %x1_gt_y1
- %resse = sext <4 x i1>%res to <4 x i32>
- ret <4 x i32> %resse
-}
-
-define <2 x i64> @vcmp_test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) {
-; GENERIC-LABEL: vcmp_test5:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpcmpleq %xmm3, %xmm2, %k1 # sched: [1:0.50]
-; GENERIC-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 {%k1} # sched: [1:0.50]
-; GENERIC-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: vcmp_test5:
-; SKX: # %bb.0:
-; SKX-NEXT: vpcmpleq %xmm3, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 {%k1} # sched: [3:1.00]
-; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x_gt_y = icmp slt <2 x i64> %x, %y
- %x1_gt_y1 = icmp sgt <2 x i64> %x1, %y1
- %res = icmp slt <2 x i1>%x_gt_y, %x1_gt_y1
- %resse = sext <2 x i1>%res to <2 x i64>
- ret <2 x i64> %resse
-}define void @vcmp_test6(<16 x i1> %mask) {
-allocas:
- %a= and <16 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
- %b = bitcast <16 x i1> %a to i16
- %c = icmp eq i16 %b, 0
- br i1 %c, label %true, label %false
-
-true:
- ret void
-
-false:
- ret void
-}
-define void @vcmp_test7(<8 x i1> %mask) {
-; GENERIC-LABEL: vcmp_test7:
-; GENERIC: # %bb.0: # %allocas
-; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
-; GENERIC-NEXT: orb $85, %al # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: vcmp_test7:
-; SKX: # %bb.0: # %allocas
-; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
-; SKX-NEXT: orb $85, %al # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
-allocas:
- %a= or <8 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
- %b = bitcast <8 x i1> %a to i8
- %c = icmp eq i8 %b, 0
- br i1 %c, label %true, label %false
-
-true:
- ret void
-
-false:
- ret void
-}
-define <16 x i8> @vcmp_test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
-; GENERIC-LABEL: vcmp_test8:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33]
-; GENERIC-NEXT: jg .LBB386_1 # sched: [1:1.00]
-; GENERIC-NEXT: # %bb.2:
-; GENERIC-NEXT: kxorw %k0, %k0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-; GENERIC-NEXT: .LBB386_1:
-; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [0:0.25]
-; GENERIC-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 # sched: [1:0.50]
-; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: vcmp_test8:
-; SKX: # %bb.0:
-; SKX-NEXT: cmpl %esi, %edi # sched: [1:0.25]
-; SKX-NEXT: jg .LBB386_1 # sched: [1:0.50]
-; SKX-NEXT: # %bb.2:
-; SKX-NEXT: kxorw %k0, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
-; SKX-NEXT: .LBB386_1:
-; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33]
-; SKX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
- %cond = icmp sgt i32 %a1, %b1
- %cmp1 = icmp sgt <16 x i32> %a, zeroinitializer
- %cmp2 = icmp ult <16 x i32> %b, zeroinitializer
- %mix = select i1 %cond, <16 x i1> %cmp1, <16 x i1> %cmp2
- %res = sext <16 x i1> %mix to <16 x i8>
- ret <16 x i8> %res
-}
-define <16 x i1> @vpmov_test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) {
-; GENERIC-LABEL: vpmov_test9:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33]
-; GENERIC-NEXT: jg .LBB387_1 # sched: [1:1.00]
-; GENERIC-NEXT: # %bb.2:
-; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: jmp .LBB387_3 # sched: [1:1.00]
-; GENERIC-NEXT: .LBB387_1:
-; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: .LBB387_3:
-; GENERIC-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: vpmov_test9:
-; SKX: # %bb.0:
-; SKX-NEXT: cmpl %esi, %edi # sched: [1:0.25]
-; SKX-NEXT: jg .LBB387_1 # sched: [1:0.50]
-; SKX-NEXT: # %bb.2:
-; SKX-NEXT: vpsllw $7, %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: jmp .LBB387_3 # sched: [1:0.50]
-; SKX-NEXT: .LBB387_1:
-; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: .LBB387_3:
-; SKX-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp sgt i32 %a1, %b1
- %c = select i1 %mask, <16 x i1>%a, <16 x i1>%b
- ret <16 x i1>%c
-}define <8 x i1> @vpmov_test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) {
- %mask = icmp sgt i32 %a1, %b1
- %c = select i1 %mask, <8 x i1>%a, <8 x i1>%b
- ret <8 x i1>%c
-}
-
-define <4 x i1> @vmov_test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) {
-; GENERIC-LABEL: vmov_test11:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33]
-; GENERIC-NEXT: jg .LBB389_1 # sched: [1:1.00]
-; GENERIC-NEXT: # %bb.2:
-; GENERIC-NEXT: vpslld $31, %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: jmp .LBB389_3 # sched: [1:1.00]
-; GENERIC-NEXT: .LBB389_1:
-; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: .LBB389_3:
-; GENERIC-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: vmov_test11:
-; SKX: # %bb.0:
-; SKX-NEXT: cmpl %esi, %edi # sched: [1:0.25]
-; SKX-NEXT: jg .LBB389_1 # sched: [1:0.50]
-; SKX-NEXT: # %bb.2:
-; SKX-NEXT: vpslld $31, %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: jmp .LBB389_3 # sched: [1:0.50]
-; SKX-NEXT: .LBB389_1:
-; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: .LBB389_3:
-; SKX-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp sgt i32 %a1, %b1
- %c = select i1 %mask, <4 x i1>%a, <4 x i1>%b
- ret <4 x i1>%c
-}
-
-define i32 @vmov_test12(i32 %x, i32 %y) {
-; GENERIC-LABEL: vmov_test12:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: vmov_test12:
-; SKX: # %bb.0:
-; SKX-NEXT: movl %edi, %eax # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = bitcast i16 21845 to <16 x i1>
- %b = extractelement <16 x i1> %a, i32 0
- %c = select i1 %b, i32 %x, i32 %y
- ret i32 %c
-}
-
-define i32 @vmov_test13(i32 %x, i32 %y) {
-; GENERIC-LABEL: vmov_test13:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl %esi, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: vmov_test13:
-; SKX: # %bb.0:
-; SKX-NEXT: movl %esi, %eax # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = bitcast i16 21845 to <16 x i1>
- %b = extractelement <16 x i1> %a, i32 3
- %c = select i1 %b, i32 %x, i32 %y
- ret i32 %c
-}define <4 x i1> @vmov_test14() {
- %a = bitcast i16 21845 to <16 x i1>
- %b = extractelement <16 x i1> %a, i32 2
- %c = insertelement <4 x i1> <i1 true, i1 false, i1 false, i1 true>, i1 %b, i32 1
- ret <4 x i1> %c
-}
-
-define <16 x i1> @vmov_test15(i32 %x, i32 %y) {
-; GENERIC-LABEL: vmov_test15:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33]
-; GENERIC-NEXT: movl $21845, %eax # imm = 0x5555
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: movl $1, %ecx # sched: [1:0.33]
-; GENERIC-NEXT: cmovgl %eax, %ecx # sched: [2:0.67]
-; GENERIC-NEXT: kmovd %ecx, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: vmov_test15:
-; SKX: # %bb.0:
-; SKX-NEXT: cmpl %esi, %edi # sched: [1:0.25]
-; SKX-NEXT: movl $21845, %eax # imm = 0x5555
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: movl $1, %ecx # sched: [1:0.25]
-; SKX-NEXT: cmovgl %eax, %ecx # sched: [1:0.50]
-; SKX-NEXT: kmovd %ecx, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = bitcast i16 21845 to <16 x i1>
- %b = bitcast i16 1 to <16 x i1>
- %mask = icmp sgt i32 %x, %y
- %c = select i1 %mask, <16 x i1> %a, <16 x i1> %b
- ret <16 x i1> %c
-}
-
-define <64 x i8> @vmov_test16(i64 %x) {
-;
-; GENERIC-LABEL: vmov_test16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovq %rdi, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: movb $1, %al # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: kshiftrq $5, %k0, %k2 # sched: [1:1.00]
-; GENERIC-NEXT: kxorq %k1, %k2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: kshiftlq $63, %k1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: kshiftrq $58, %k1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: kxorq %k1, %k0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: vmov_test16:
-; SKX: # %bb.0:
-; SKX-NEXT: kmovq %rdi, %k0 # sched: [1:1.00]
-; SKX-NEXT: movb $1, %al # sched: [1:0.25]
-; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00]
-; SKX-NEXT: kshiftrq $5, %k0, %k2 # sched: [3:1.00]
-; SKX-NEXT: kxorq %k1, %k2, %k1 # sched: [1:1.00]
-; SKX-NEXT: kshiftlq $63, %k1, %k1 # sched: [3:1.00]
-; SKX-NEXT: kshiftrq $58, %k1, %k1 # sched: [3:1.00]
-; SKX-NEXT: kxorq %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = bitcast i64 %x to <64 x i1>
- %b = insertelement <64 x i1>%a, i1 true, i32 5
- %c = sext <64 x i1>%b to <64 x i8>
- ret <64 x i8>%c
-}
-
-define <64 x i8> @vmov_test17(i64 %x, i32 %y, i32 %z) {
-;
-; GENERIC-LABEL: vmov_test17:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovq %rdi, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: cmpl %edx, %esi # sched: [1:0.33]
-; GENERIC-NEXT: setg %al # sched: [1:0.50]
-; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: kshiftrq $5, %k0, %k2 # sched: [1:1.00]
-; GENERIC-NEXT: kxorq %k1, %k2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: kshiftlq $63, %k1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: kshiftrq $58, %k1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: kxorq %k1, %k0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: vmov_test17:
-; SKX: # %bb.0:
-; SKX-NEXT: kmovq %rdi, %k0 # sched: [1:1.00]
-; SKX-NEXT: cmpl %edx, %esi # sched: [1:0.25]
-; SKX-NEXT: setg %al # sched: [1:0.50]
-; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00]
-; SKX-NEXT: kshiftrq $5, %k0, %k2 # sched: [3:1.00]
-; SKX-NEXT: kxorq %k1, %k2, %k1 # sched: [1:1.00]
-; SKX-NEXT: kshiftlq $63, %k1, %k1 # sched: [3:1.00]
-; SKX-NEXT: kshiftrq $58, %k1, %k1 # sched: [3:1.00]
-; SKX-NEXT: kxorq %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = bitcast i64 %x to <64 x i1>
- %b = icmp sgt i32 %y, %z
- %c = insertelement <64 x i1>%a, i1 %b, i32 5
- %d = sext <64 x i1>%c to <64 x i8>
- ret <64 x i8>%d
-}
-
-define <8 x i1> @vmov_test18(i8 %a, i16 %y) {
-; GENERIC-LABEL: vmov_test18:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: kshiftrw $8, %k1, %k2 # sched: [1:1.00]
-; GENERIC-NEXT: kshiftrw $9, %k1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: kshiftrb $6, %k0, %k3 # sched: [1:1.00]
-; GENERIC-NEXT: kxorb %k1, %k3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: kshiftlb $7, %k1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: kshiftrb $1, %k1, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: kxorb %k1, %k0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kshiftlb $1, %k0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT: kshiftrb $1, %k0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT: kshiftlb $7, %k2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: korb %k1, %k0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: vmov_test18:
-; SKX: # %bb.0:
-; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00]
-; SKX-NEXT: kshiftrw $8, %k1, %k2 # sched: [3:1.00]
-; SKX-NEXT: kshiftrw $9, %k1, %k1 # sched: [3:1.00]
-; SKX-NEXT: kshiftrb $6, %k0, %k3 # sched: [3:1.00]
-; SKX-NEXT: kxorb %k1, %k3, %k1 # sched: [1:1.00]
-; SKX-NEXT: kshiftlb $7, %k1, %k1 # sched: [3:1.00]
-; SKX-NEXT: kshiftrb $1, %k1, %k1 # sched: [3:1.00]
-; SKX-NEXT: kxorb %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: kshiftlb $1, %k0, %k0 # sched: [3:1.00]
-; SKX-NEXT: kshiftrb $1, %k0, %k0 # sched: [3:1.00]
-; SKX-NEXT: kshiftlb $7, %k2, %k1 # sched: [3:1.00]
-; SKX-NEXT: korb %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = bitcast i8 %a to <8 x i1>
- %b1 = bitcast i16 %y to <16 x i1>
- %el1 = extractelement <16 x i1>%b1, i32 8
- %el2 = extractelement <16 x i1>%b1, i32 9
- %c = insertelement <8 x i1>%b, i1 %el1, i32 7
- %d = insertelement <8 x i1>%c, i1 %el2, i32 6
- ret <8 x i1>%d
-}
-define <32 x i16> @vmov_test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone {
-; GENERIC-LABEL: vmov_test21:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: vmov_test21:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:0.50]
-; SKX-NEXT: vpmovb2m %ymm1, %k1 # sched: [1:1.00]
-; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
- ret <32 x i16> %ret
-}
-
-define void @vmov_test22(<4 x i1> %a, <4 x i1>* %addr) {
-; GENERIC-LABEL: vmov_test22:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: vmov_test22:
-; SKX: # %bb.0:
-; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- store <4 x i1> %a, <4 x i1>* %addr
- ret void
-}
-
-define void @vmov_test23(<2 x i1> %a, <2 x i1>* %addr) {
-; GENERIC-LABEL: vmov_test23:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovq2m %xmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: vmov_test23:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovq2m %xmm0, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- store <2 x i1> %a, <2 x i1>* %addr
- ret void
-}
-
-define void @store_v1i1(<1 x i1> %c , <1 x i1>* %ptr) {
-; GENERIC-LABEL: store_v1i1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kxnorw %k0, %k0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovb %k0, (%rsi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: store_v1i1:
-; SKX: # %bb.0:
-; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00]
-; SKX-NEXT: kxnorw %k0, %k0, %k1 # sched: [1:1.00]
-; SKX-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovb %k0, (%rsi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = xor <1 x i1> %c, <i1 1>
- store <1 x i1> %x, <1 x i1>* %ptr, align 4
- ret void
-}
-
-define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) {
-; GENERIC-LABEL: store_v2i1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovq2m %xmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: store_v2i1:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovq2m %xmm0, %k0 # sched: [1:1.00]
-; SKX-NEXT: knotw %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = xor <2 x i1> %c, <i1 1, i1 1>
- store <2 x i1> %x, <2 x i1>* %ptr, align 4
- ret void
-}
-
-define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) {
-; GENERIC-LABEL: store_v4i1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: store_v4i1:
-; SKX: # %bb.0:
-; SKX-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovd2m %xmm0, %k0 # sched: [1:1.00]
-; SKX-NEXT: knotw %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = xor <4 x i1> %c, <i1 1, i1 1, i1 1, i1 1>
- store <4 x i1> %x, <4 x i1>* %ptr, align 4
- ret void
-}
-
-define void @store_v8i1(<8 x i1> %c , <8 x i1>* %ptr) {
-; GENERIC-LABEL: store_v8i1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: knotb %k0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: store_v8i1:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00]
-; SKX-NEXT: knotb %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = xor <8 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
- store <8 x i1> %x, <8 x i1>* %ptr, align 4
- ret void
-}
-
-define void @store_v16i1(<16 x i1> %c , <16 x i1>* %ptr) {
-; GENERIC-LABEL: store_v16i1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: knotw %k0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: store_v16i1:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:1.00]
-; SKX-NEXT: knotw %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %x = xor <16 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
- store <16 x i1> %x, <16 x i1>* %ptr, align 4
- ret void
-}
-
-;void f2(int);
-;void f1(int c)
-;{
-; static int v = 0;
-; if (v == 0)
-; v = 1;
-; else
-; v = 0;
-; f2(v);
-;}
-
-@f1.v = internal unnamed_addr global i1 false, align 4
-
-define void @f1(i32 %c) {
-; GENERIC-LABEL: f1:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: movzbl {{.*}}(%rip), %edi # sched: [5:0.50]
-; GENERIC-NEXT: xorl $1, %edi # sched: [1:0.33]
-; GENERIC-NEXT: movb %dil, {{.*}}(%rip) # sched: [1:1.00]
-; GENERIC-NEXT: jmp f2 # TAILCALL
-;
-; SKX-LABEL: f1:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: movzbl {{.*}}(%rip), %edi # sched: [5:0.50]
-; SKX-NEXT: xorl $1, %edi # sched: [1:0.25]
-; SKX-NEXT: movb %dil, {{.*}}(%rip) # sched: [1:1.00]
-; SKX-NEXT: jmp f2 # TAILCALL
-entry:
- %.b1 = load i1, i1* @f1.v, align 4
- %not..b1 = xor i1 %.b1, true
- store i1 %not..b1, i1* @f1.v, align 4
- %0 = zext i1 %not..b1 to i32
- tail call void @f2(i32 %0) #2
- ret void
-}
-
-declare void @f2(i32) #1
-
-define void @store_i16_i1(i16 %x, i1 *%y) {
-; GENERIC-LABEL: store_i16_i1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: andl $1, %edi # sched: [1:0.33]
-; GENERIC-NEXT: movb %dil, (%rsi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: store_i16_i1:
-; SKX: # %bb.0:
-; SKX-NEXT: andl $1, %edi # sched: [1:0.25]
-; SKX-NEXT: movb %dil, (%rsi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %c = trunc i16 %x to i1
- store i1 %c, i1* %y
- ret void
-}
-
-define void @store_i8_i1(i8 %x, i1 *%y) {
-; GENERIC-LABEL: store_i8_i1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: andl $1, %edi # sched: [1:0.33]
-; GENERIC-NEXT: movb %dil, (%rsi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: store_i8_i1:
-; SKX: # %bb.0:
-; SKX-NEXT: andl $1, %edi # sched: [1:0.25]
-; SKX-NEXT: movb %dil, (%rsi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %c = trunc i8 %x to i1
- store i1 %c, i1* %y
- ret void
-}
-
-define <32 x i16> @test_build_vec_v32i1(<32 x i16> %x) {
-; GENERIC-LABEL: test_build_vec_v32i1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl $1497715861, %eax # imm = 0x59455495
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %eax, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_build_vec_v32i1:
-; SKX: # %bb.0:
-; SKX-NEXT: movl $1497715861, %eax # imm = 0x59455495
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: kmovd %eax, %k1 # sched: [1:1.00]
-; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %ret = select <32 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <32 x i16> %x, <32 x i16> zeroinitializer
- ret <32 x i16> %ret
-}
-
-define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) {
-; GENERIC-LABEL: test_build_vec_v64i1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_build_vec_v64i1:
-; SKX: # %bb.0:
-; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zero,zero,zmm0[2],zero,zero,zero,zmm0[6],zero,zmm0[8],zero,zmm0[10],zero,zmm0[12],zero,zero,zmm0[15],zero,zero,zmm0[18],zero,zmm0[20],zero,zmm0[22],zero,zmm0[24],zero,zero,zmm0[27],zero,zero,zmm0[30],zero,zmm0[32],zero,zmm0[34],zero,zero,zero,zmm0[38],zero,zmm0[40],zero,zero,zmm0[43,44],zero,zmm0[46],zero,zmm0[48],zero,zmm0[50],zero,zero,zero,zmm0[54],zero,zmm0[56],zero,zero,zmm0[59,60],zero,zmm0[62],zero sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %ret = select <64 x i1> <i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <64 x i8> %x, <64 x i8> zeroinitializer
- ret <64 x i8> %ret
-}
-
-define void @ktest_1(<8 x double> %in, double * %base) {
-; GENERIC-LABEL: ktest_1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovupd (%rdi), %zmm1 # sched: [7:0.50]
-; GENERIC-NEXT: vcmpltpd %zmm0, %zmm1, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} # sched: [7:0.50]
-; GENERIC-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} # sched: [3:1.00]
-; GENERIC-NEXT: kortestb %k0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: je .LBB410_2 # sched: [1:1.00]
-; GENERIC-NEXT: # %bb.1: # %L1
-; GENERIC-NEXT: vmovapd %zmm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-; GENERIC-NEXT: .LBB410_2: # %L2
-; GENERIC-NEXT: vmovapd %zmm0, 8(%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: ktest_1:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovupd (%rdi), %zmm1 # sched: [8:0.50]
-; SKX-NEXT: vcmpltpd %zmm0, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vmovupd 8(%rdi), %zmm1 {%k1} {z} # sched: [8:0.50]
-; SKX-NEXT: vcmpltpd %zmm1, %zmm0, %k0 {%k1} # sched: [3:1.00]
-; SKX-NEXT: kortestb %k0, %k0 # sched: [3:1.00]
-; SKX-NEXT: je .LBB410_2 # sched: [1:0.50]
-; SKX-NEXT: # %bb.1: # %L1
-; SKX-NEXT: vmovapd %zmm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
-; SKX-NEXT: .LBB410_2: # %L2
-; SKX-NEXT: vmovapd %zmm0, 8(%rdi) # sched: [1:1.00]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
- %addr1 = getelementptr double, double * %base, i64 0
- %addr2 = getelementptr double, double * %base, i64 1
-
- %vaddr1 = bitcast double* %addr1 to <8 x double>*
- %vaddr2 = bitcast double* %addr2 to <8 x double>*
-
- %val1 = load <8 x double>, <8 x double> *%vaddr1, align 1
- %val2 = load <8 x double>, <8 x double> *%vaddr2, align 1
-
- %sel1 = fcmp ogt <8 x double>%in, %val1
- %val3 = select <8 x i1> %sel1, <8 x double> %val2, <8 x double> zeroinitializer
- %sel2 = fcmp olt <8 x double> %in, %val3
- %sel3 = and <8 x i1> %sel1, %sel2
-
- %int_sel3 = bitcast <8 x i1> %sel3 to i8
- %res = icmp eq i8 %int_sel3, zeroinitializer
- br i1 %res, label %L2, label %L1
-L1:
- store <8 x double> %in, <8 x double>* %vaddr1
- br label %End
-L2:
- store <8 x double> %in, <8 x double>* %vaddr2
- br label %End
-End:
- ret void
-}
-
-define void @ktest_2(<32 x float> %in, float * %base) {
-;
-; GENERIC-LABEL: ktest_2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovups (%rdi), %zmm2 # sched: [7:0.50]
-; GENERIC-NEXT: vmovups 64(%rdi), %zmm3 # sched: [7:0.50]
-; GENERIC-NEXT: vcmpltps %zmm0, %zmm2, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: vcmpltps %zmm1, %zmm3, %k2 # sched: [3:1.00]
-; GENERIC-NEXT: kunpckwd %k1, %k2, %k0 # sched: [1:1.00]
-; GENERIC-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} # sched: [7:0.50]
-; GENERIC-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} # sched: [7:0.50]
-; GENERIC-NEXT: vcmpltps %zmm3, %zmm0, %k1 # sched: [3:1.00]
-; GENERIC-NEXT: vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00]
-; GENERIC-NEXT: kunpckwd %k1, %k2, %k1 # sched: [1:1.00]
-; GENERIC-NEXT: kortestd %k1, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: je .LBB411_2 # sched: [1:1.00]
-; GENERIC-NEXT: # %bb.1: # %L1
-; GENERIC-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %zmm1, 64(%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-; GENERIC-NEXT: .LBB411_2: # %L2
-; GENERIC-NEXT: vmovaps %zmm0, 4(%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %zmm1, 68(%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: ktest_2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovups (%rdi), %zmm2 # sched: [8:0.50]
-; SKX-NEXT: vmovups 64(%rdi), %zmm3 # sched: [8:0.50]
-; SKX-NEXT: vcmpltps %zmm0, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vcmpltps %zmm1, %zmm3, %k2 # sched: [3:1.00]
-; SKX-NEXT: kunpckwd %k1, %k2, %k0 # sched: [3:1.00]
-; SKX-NEXT: vmovups 68(%rdi), %zmm2 {%k2} {z} # sched: [8:0.50]
-; SKX-NEXT: vmovups 4(%rdi), %zmm3 {%k1} {z} # sched: [8:0.50]
-; SKX-NEXT: vcmpltps %zmm3, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vcmpltps %zmm2, %zmm1, %k2 # sched: [3:1.00]
-; SKX-NEXT: kunpckwd %k1, %k2, %k1 # sched: [3:1.00]
-; SKX-NEXT: kortestd %k1, %k0 # sched: [3:1.00]
-; SKX-NEXT: je .LBB411_2 # sched: [1:0.50]
-; SKX-NEXT: # %bb.1: # %L1
-; SKX-NEXT: vmovaps %zmm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: vmovaps %zmm1, 64(%rdi) # sched: [1:1.00]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
-; SKX-NEXT: .LBB411_2: # %L2
-; SKX-NEXT: vmovaps %zmm0, 4(%rdi) # sched: [1:1.00]
-; SKX-NEXT: vmovaps %zmm1, 68(%rdi) # sched: [1:1.00]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
- %addr1 = getelementptr float, float * %base, i64 0
- %addr2 = getelementptr float, float * %base, i64 1
-
- %vaddr1 = bitcast float* %addr1 to <32 x float>*
- %vaddr2 = bitcast float* %addr2 to <32 x float>*
-
- %val1 = load <32 x float>, <32 x float> *%vaddr1, align 1
- %val2 = load <32 x float>, <32 x float> *%vaddr2, align 1
-
- %sel1 = fcmp ogt <32 x float>%in, %val1
- %val3 = select <32 x i1> %sel1, <32 x float> %val2, <32 x float> zeroinitializer
- %sel2 = fcmp olt <32 x float> %in, %val3
- %sel3 = or <32 x i1> %sel1, %sel2
-
- %int_sel3 = bitcast <32 x i1> %sel3 to i32
- %res = icmp eq i32 %int_sel3, zeroinitializer
- br i1 %res, label %L2, label %L1
-L1:
- store <32 x float> %in, <32 x float>* %vaddr1
- br label %End
-L2:
- store <32 x float> %in, <32 x float>* %vaddr2
- br label %End
-End:
- ret void
-}
-
-define <8 x i64> @load_8i1(<8 x i1>* %a) {
-; GENERIC-LABEL: load_8i1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovb (%rdi), %k0 # sched: [5:0.50]
-; GENERIC-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: load_8i1:
-; SKX: # %bb.0:
-; SKX-NEXT: kmovb (%rdi), %k0 # sched: [7:1.00]
-; SKX-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = load <8 x i1>, <8 x i1>* %a
- %c = sext <8 x i1> %b to <8 x i64>
- ret <8 x i64> %c
-}
-
-define <16 x i32> @load_16i1(<16 x i1>* %a) {
-; GENERIC-LABEL: load_16i1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovw (%rdi), %k0 # sched: [5:0.50]
-; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: load_16i1:
-; SKX: # %bb.0:
-; SKX-NEXT: kmovw (%rdi), %k0 # sched: [7:1.00]
-; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = load <16 x i1>, <16 x i1>* %a
- %c = sext <16 x i1> %b to <16 x i32>
- ret <16 x i32> %c
-}
-
-define <2 x i16> @load_2i1(<2 x i1>* %a) {
-; GENERIC-LABEL: load_2i1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovb (%rdi), %k0 # sched: [5:0.50]
-; GENERIC-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: load_2i1:
-; SKX: # %bb.0:
-; SKX-NEXT: kmovb (%rdi), %k0 # sched: [7:1.00]
-; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = load <2 x i1>, <2 x i1>* %a
- %c = sext <2 x i1> %b to <2 x i16>
- ret <2 x i16> %c
-}
-
-define <4 x i16> @load_4i1(<4 x i1>* %a) {
-; GENERIC-LABEL: load_4i1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovb (%rdi), %k0 # sched: [5:0.50]
-; GENERIC-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: load_4i1:
-; SKX: # %bb.0:
-; SKX-NEXT: kmovb (%rdi), %k0 # sched: [7:1.00]
-; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = load <4 x i1>, <4 x i1>* %a
- %c = sext <4 x i1> %b to <4 x i16>
- ret <4 x i16> %c
-}
-
-define <32 x i16> @load_32i1(<32 x i1>* %a) {
-; GENERIC-LABEL: load_32i1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd (%rdi), %k0 # sched: [5:0.50]
-; GENERIC-NEXT: vpmovm2w %k0, %zmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: load_32i1:
-; SKX: # %bb.0:
-; SKX-NEXT: kmovd (%rdi), %k0 # sched: [7:1.00]
-; SKX-NEXT: vpmovm2w %k0, %zmm0 # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = load <32 x i1>, <32 x i1>* %a
- %c = sext <32 x i1> %b to <32 x i16>
- ret <32 x i16> %c
-}
-
-define <64 x i8> @load_64i1(<64 x i1>* %a) {
-; GENERIC-LABEL: load_64i1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovq (%rdi), %k0 # sched: [5:0.50]
-; GENERIC-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: load_64i1:
-; SKX: # %bb.0:
-; SKX-NEXT: kmovq (%rdi), %k0 # sched: [7:1.00]
-; SKX-NEXT: vpmovm2b %k0, %zmm0 # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = load <64 x i1>, <64 x i1>* %a
- %c = sext <64 x i1> %b to <64 x i8>
- ret <64 x i8> %c
-}
-
-define void @store_8i1(<8 x i1>* %a, <8 x i1> %v) {
-; GENERIC-LABEL: store_8i1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: store_8i1:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- store <8 x i1> %v, <8 x i1>* %a
- ret void
-}
-
-define void @store_8i1_1(<8 x i1>* %a, <8 x i16> %v) {
-; GENERIC-LABEL: store_8i1_1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: store_8i1_1:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovw2m %xmm0, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovb %k0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %v1 = trunc <8 x i16> %v to <8 x i1>
- store <8 x i1> %v1, <8 x i1>* %a
- ret void
-}
-
-define void @store_16i1(<16 x i1>* %a, <16 x i1> %v) {
-; GENERIC-LABEL: store_16i1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: store_16i1:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovb2m %xmm0, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovw %k0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- store <16 x i1> %v, <16 x i1>* %a
- ret void
-}
-
-define void @store_32i1(<32 x i1>* %a, <32 x i1> %v) {
-; GENERIC-LABEL: store_32i1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %ymm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %k0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: store_32i1:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:0.50]
-; SKX-NEXT: vpmovb2m %ymm0, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovd %k0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
- store <32 x i1> %v, <32 x i1>* %a
- ret void
-}
-
-define void @store_32i1_1(<32 x i1>* %a, <32 x i16> %v) {
-; GENERIC-LABEL: store_32i1_1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $15, %zmm0, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovw2m %zmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %k0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: store_32i1_1:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $15, %zmm0, %zmm0 # sched: [1:1.00]
-; SKX-NEXT: vpmovw2m %zmm0, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovd %k0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
- %v1 = trunc <32 x i16> %v to <32 x i1>
- store <32 x i1> %v1, <32 x i1>* %a
- ret void
-}
-
-
-define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
-;
-; GENERIC-LABEL: store_64i1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpsllw $7, %zmm0, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovq %k0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: store_64i1:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw $7, %zmm0, %zmm0 # sched: [1:1.00]
-; SKX-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovq %k0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
- store <64 x i1> %v, <64 x i1>* %a
- ret void
-}
-
-define i32 @test_bitcast_v8i1_zext(<16 x i32> %a) {
-; GENERIC-LABEL: test_bitcast_v8i1_zext:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovb %k0, %eax # sched: [1:0.33]
-; GENERIC-NEXT: addl %eax, %eax # sched: [1:0.33]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_bitcast_v8i1_zext:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: kmovb %k0, %eax # sched: [3:1.00]
-; SKX-NEXT: addl %eax, %eax # sched: [1:0.25]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
- %v1 = icmp eq <16 x i32> %a, zeroinitializer
- %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
- %mask1 = bitcast <8 x i1> %mask to i8
- %val = zext i8 %mask1 to i32
- %val1 = add i32 %val, %val
- ret i32 %val1
-}
-
-define i32 @test_bitcast_v16i1_zext(<16 x i32> %a) {
-; GENERIC-LABEL: test_bitcast_v16i1_zext:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovw %k0, %eax # sched: [1:0.33]
-; GENERIC-NEXT: addl %eax, %eax # sched: [1:0.33]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_bitcast_v16i1_zext:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: kmovw %k0, %eax # sched: [3:1.00]
-; SKX-NEXT: addl %eax, %eax # sched: [1:0.25]
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
- %v1 = icmp eq <16 x i32> %a, zeroinitializer
- %mask1 = bitcast <16 x i1> %v1 to i16
- %val = zext i16 %mask1 to i32
- %val1 = add i32 %val, %val
- ret i32 %val1
-}
-
-define i16 @test_v16i1_add(i16 %x, i16 %y) {
-; GENERIC-LABEL: test_v16i1_add:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
-; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_v16i1_add:
-; SKX: # %bb.0:
-; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00]
-; SKX-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
-; SKX-NEXT: # kill: def $ax killed $ax killed $eax
-; SKX-NEXT: retq # sched: [7:1.00]
- %m0 = bitcast i16 %x to <16 x i1>
- %m1 = bitcast i16 %y to <16 x i1>
- %m2 = add <16 x i1> %m0, %m1
- %ret = bitcast <16 x i1> %m2 to i16
- ret i16 %ret
-}
-
-define i16 @test_v16i1_sub(i16 %x, i16 %y) {
-; GENERIC-LABEL: test_v16i1_sub:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: kxorw %k1, %k0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
-; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_v16i1_sub:
-; SKX: # %bb.0:
-; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00]
-; SKX-NEXT: kxorw %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
-; SKX-NEXT: # kill: def $ax killed $ax killed $eax
-; SKX-NEXT: retq # sched: [7:1.00]
- %m0 = bitcast i16 %x to <16 x i1>
- %m1 = bitcast i16 %y to <16 x i1>
- %m2 = sub <16 x i1> %m0, %m1
- %ret = bitcast <16 x i1> %m2 to i16
- ret i16 %ret
-}
-
-define i16 @test_v16i1_mul(i16 %x, i16 %y) {
-; GENERIC-LABEL: test_v16i1_mul:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: kandw %k1, %k0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
-; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_v16i1_mul:
-; SKX: # %bb.0:
-; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00]
-; SKX-NEXT: kandw %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
-; SKX-NEXT: # kill: def $ax killed $ax killed $eax
-; SKX-NEXT: retq # sched: [7:1.00]
- %m0 = bitcast i16 %x to <16 x i1>
- %m1 = bitcast i16 %y to <16 x i1>
- %m2 = mul <16 x i1> %m0, %m1
- %ret = bitcast <16 x i1> %m2 to i16
- ret i16 %ret
-}
-
-define i8 @test_v8i1_add(i8 %x, i8 %y) {
-; GENERIC-LABEL: test_v8i1_add:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: kxorb %k1, %k0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
-; GENERIC-NEXT: # kill: def $al killed $al killed $eax
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_v8i1_add:
-; SKX: # %bb.0:
-; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00]
-; SKX-NEXT: kxorb %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
-; SKX-NEXT: # kill: def $al killed $al killed $eax
-; SKX-NEXT: retq # sched: [7:1.00]
- %m0 = bitcast i8 %x to <8 x i1>
- %m1 = bitcast i8 %y to <8 x i1>
- %m2 = add <8 x i1> %m0, %m1
- %ret = bitcast <8 x i1> %m2 to i8
- ret i8 %ret
-}
-
-define i8 @test_v8i1_sub(i8 %x, i8 %y) {
-; GENERIC-LABEL: test_v8i1_sub:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: kxorb %k1, %k0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
-; GENERIC-NEXT: # kill: def $al killed $al killed $eax
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_v8i1_sub:
-; SKX: # %bb.0:
-; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00]
-; SKX-NEXT: kxorb %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
-; SKX-NEXT: # kill: def $al killed $al killed $eax
-; SKX-NEXT: retq # sched: [7:1.00]
- %m0 = bitcast i8 %x to <8 x i1>
- %m1 = bitcast i8 %y to <8 x i1>
- %m2 = sub <8 x i1> %m0, %m1
- %ret = bitcast <8 x i1> %m2 to i8
- ret i8 %ret
-}
-
-define i8 @test_v8i1_mul(i8 %x, i8 %y) {
-; GENERIC-LABEL: test_v8i1_mul:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovd %edi, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %esi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: kandb %k1, %k0, %k0 # sched: [1:0.33]
-; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
-; GENERIC-NEXT: # kill: def $al killed $al killed $eax
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_v8i1_mul:
-; SKX: # %bb.0:
-; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovd %esi, %k1 # sched: [1:1.00]
-; SKX-NEXT: kandb %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
-; SKX-NEXT: # kill: def $al killed $al killed $eax
-; SKX-NEXT: retq # sched: [7:1.00]
- %m0 = bitcast i8 %x to <8 x i1>
- %m1 = bitcast i8 %y to <8 x i1>
- %m2 = mul <8 x i1> %m0, %m1
- %ret = bitcast <8 x i1> %m2 to i8
- ret i8 %ret
-}
-
-define <16 x i32> @_inreg16xi32(i32 %a) {
-; GENERIC-LABEL: _inreg16xi32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpbroadcastd %edi, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: _inreg16xi32:
-; SKX: # %bb.0:
-; SKX-NEXT: vpbroadcastd %edi, %zmm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = insertelement <16 x i32> undef, i32 %a, i32 0
- %c = shufflevector <16 x i32> %b, <16 x i32> undef, <16 x i32> zeroinitializer
- ret <16 x i32> %c
-}
-
-define <8 x i64> @_inreg8xi64(i64 %a) {
-; GENERIC-LABEL: _inreg8xi64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpbroadcastq %rdi, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: _inreg8xi64:
-; SKX: # %bb.0:
-; SKX-NEXT: vpbroadcastq %rdi, %zmm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = insertelement <8 x i64> undef, i64 %a, i32 0
- %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer
- ret <8 x i64> %c
-}
-
-define <16 x float> @_ss16xfloat_v4(<4 x float> %a) {
-; GENERIC-LABEL: _ss16xfloat_v4:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: _ss16xfloat_v4:
-; SKX: # %bb.0:
-; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = shufflevector <4 x float> %a, <4 x float> undef, <16 x i32> zeroinitializer
- ret <16 x float> %b
-}
-
-define <16 x float> @_inreg16xfloat(float %a) {
-; GENERIC-LABEL: _inreg16xfloat:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: _inreg16xfloat:
-; SKX: # %bb.0:
-; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = insertelement <16 x float> undef, float %a, i32 0
- %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
- ret <16 x float> %c
-}
-
-define <16 x float> @_ss16xfloat_mask(float %a, <16 x float> %i, <16 x i32> %mask1) {
-; GENERIC-LABEL: _ss16xfloat_mask:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vbroadcastss %xmm0, %zmm1 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: _ss16xfloat_mask:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vbroadcastss %xmm0, %zmm1 {%k1} # sched: [3:1.00]
-; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp ne <16 x i32> %mask1, zeroinitializer
- %b = insertelement <16 x float> undef, float %a, i32 0
- %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
- %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> %i
- ret <16 x float> %r
-}
-
-define <16 x float> @_ss16xfloat_maskz(float %a, <16 x i32> %mask1) {
-; GENERIC-LABEL: _ss16xfloat_maskz:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: _ss16xfloat_maskz:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp ne <16 x i32> %mask1, zeroinitializer
- %b = insertelement <16 x float> undef, float %a, i32 0
- %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
- %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> zeroinitializer
- ret <16 x float> %r
-}
-
-define <16 x float> @_ss16xfloat_load(float* %a.ptr) {
-; GENERIC-LABEL: _ss16xfloat_load:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: _ss16xfloat_load:
-; SKX: # %bb.0:
-; SKX-NEXT: vbroadcastss (%rdi), %zmm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load float, float* %a.ptr
- %b = insertelement <16 x float> undef, float %a, i32 0
- %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
- ret <16 x float> %c
-}
-
-define <16 x float> @_ss16xfloat_mask_load(float* %a.ptr, <16 x float> %i, <16 x i32> %mask1) {
-; GENERIC-LABEL: _ss16xfloat_mask_load:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: _ss16xfloat_mask_load:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load float, float* %a.ptr
- %mask = icmp ne <16 x i32> %mask1, zeroinitializer
- %b = insertelement <16 x float> undef, float %a, i32 0
- %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
- %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> %i
- ret <16 x float> %r
-}
-
-define <16 x float> @_ss16xfloat_maskz_load(float* %a.ptr, <16 x i32> %mask1) {
-; GENERIC-LABEL: _ss16xfloat_maskz_load:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: _ss16xfloat_maskz_load:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestmd %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load float, float* %a.ptr
- %mask = icmp ne <16 x i32> %mask1, zeroinitializer
- %b = insertelement <16 x float> undef, float %a, i32 0
- %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
- %r = select <16 x i1> %mask, <16 x float> %c, <16 x float> zeroinitializer
- ret <16 x float> %r
-}
-
-define <8 x double> @_inreg8xdouble(double %a) {
-; GENERIC-LABEL: _inreg8xdouble:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: _inreg8xdouble:
-; SKX: # %bb.0:
-; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = insertelement <8 x double> undef, double %a, i32 0
- %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
- ret <8 x double> %c
-}
-
-define <8 x double> @_sd8xdouble_mask(double %a, <8 x double> %i, <8 x i32> %mask1) {
-; GENERIC-LABEL: _sd8xdouble_mask:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: _sd8xdouble_mask:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1} # sched: [3:1.00]
-; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp ne <8 x i32> %mask1, zeroinitializer
- %b = insertelement <8 x double> undef, double %a, i32 0
- %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
- %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> %i
- ret <8 x double> %r
-}
-
-define <8 x double> @_sd8xdouble_maskz(double %a, <8 x i32> %mask1) {
-; GENERIC-LABEL: _sd8xdouble_maskz:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: _sd8xdouble_maskz:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z} # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %mask = icmp ne <8 x i32> %mask1, zeroinitializer
- %b = insertelement <8 x double> undef, double %a, i32 0
- %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
- %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> zeroinitializer
- ret <8 x double> %r
-}
-
-define <8 x double> @_sd8xdouble_load(double* %a.ptr) {
-; GENERIC-LABEL: _sd8xdouble_load:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: _sd8xdouble_load:
-; SKX: # %bb.0:
-; SKX-NEXT: vbroadcastsd (%rdi), %zmm0 # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load double, double* %a.ptr
- %b = insertelement <8 x double> undef, double %a, i32 0
- %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
- ret <8 x double> %c
-}
-
-define <8 x double> @_sd8xdouble_mask_load(double* %a.ptr, <8 x double> %i, <8 x i32> %mask1) {
-; GENERIC-LABEL: _sd8xdouble_mask_load:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: _sd8xdouble_mask_load:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load double, double* %a.ptr
- %mask = icmp ne <8 x i32> %mask1, zeroinitializer
- %b = insertelement <8 x double> undef, double %a, i32 0
- %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
- %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> %i
- ret <8 x double> %r
-}
-
-define <8 x double> @_sd8xdouble_maskz_load(double* %a.ptr, <8 x i32> %mask1) {
-; GENERIC-LABEL: _sd8xdouble_maskz_load:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: _sd8xdouble_maskz_load:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestmd %ymm0, %ymm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = load double, double* %a.ptr
- %mask = icmp ne <8 x i32> %mask1, zeroinitializer
- %b = insertelement <8 x double> undef, double %a, i32 0
- %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
- %r = select <8 x i1> %mask, <8 x double> %c, <8 x double> zeroinitializer
- ret <8 x double> %r
-}
-
-define <16 x i32> @_xmm16xi32(<16 x i32> %a) {
-; GENERIC-LABEL: _xmm16xi32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: _xmm16xi32:
-; SKX: # %bb.0:
-; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = shufflevector <16 x i32> %a, <16 x i32> undef, <16 x i32> zeroinitializer
- ret <16 x i32> %b
-}
-
-define <16 x float> @_xmm16xfloat(<16 x float> %a) {
-; GENERIC-LABEL: _xmm16xfloat:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: _xmm16xfloat:
-; SKX: # %bb.0:
-; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %b = shufflevector <16 x float> %a, <16 x float> undef, <16 x i32> zeroinitializer
- ret <16 x float> %b
-}
-
-define <16 x i32> @test_vbroadcast() {
-; GENERIC-LABEL: test_vbroadcast:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [0:0.25]
-; GENERIC-NEXT: vcmpunordps %zmm0, %zmm0, %k0 # sched: [3:1.00]
-; GENERIC-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.33]
-; GENERIC-NEXT: knotw %k0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_vbroadcast:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: vcmpunordps %zmm0, %zmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25]
-; SKX-NEXT: knotw %k0, %k1 # sched: [1:1.00]
-; SKX-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %0 = sext <16 x i1> zeroinitializer to <16 x i32>
- %1 = fcmp uno <16 x float> undef, zeroinitializer
- %2 = sext <16 x i1> %1 to <16 x i32>
- %3 = select <16 x i1> %1, <16 x i32> %0, <16 x i32> %2
- ret <16 x i32> %3
-}
-
-; We implement the set1 intrinsics with vector initializers. Verify that the
-; IR generated will produce broadcasts at the end.
-define <8 x double> @test_set1_pd(double %d) #2 {
-; GENERIC-LABEL: test_set1_pd:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_set1_pd:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %vecinit.i = insertelement <8 x double> undef, double %d, i32 0
- %vecinit1.i = insertelement <8 x double> %vecinit.i, double %d, i32 1
- %vecinit2.i = insertelement <8 x double> %vecinit1.i, double %d, i32 2
- %vecinit3.i = insertelement <8 x double> %vecinit2.i, double %d, i32 3
- %vecinit4.i = insertelement <8 x double> %vecinit3.i, double %d, i32 4
- %vecinit5.i = insertelement <8 x double> %vecinit4.i, double %d, i32 5
- %vecinit6.i = insertelement <8 x double> %vecinit5.i, double %d, i32 6
- %vecinit7.i = insertelement <8 x double> %vecinit6.i, double %d, i32 7
- ret <8 x double> %vecinit7.i
-}
-
-define <8 x i64> @test_set1_epi64(i64 %d) #2 {
-; GENERIC-LABEL: test_set1_epi64:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vpbroadcastq %rdi, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_set1_epi64:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vpbroadcastq %rdi, %zmm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %vecinit.i = insertelement <8 x i64> undef, i64 %d, i32 0
- %vecinit1.i = insertelement <8 x i64> %vecinit.i, i64 %d, i32 1
- %vecinit2.i = insertelement <8 x i64> %vecinit1.i, i64 %d, i32 2
- %vecinit3.i = insertelement <8 x i64> %vecinit2.i, i64 %d, i32 3
- %vecinit4.i = insertelement <8 x i64> %vecinit3.i, i64 %d, i32 4
- %vecinit5.i = insertelement <8 x i64> %vecinit4.i, i64 %d, i32 5
- %vecinit6.i = insertelement <8 x i64> %vecinit5.i, i64 %d, i32 6
- %vecinit7.i = insertelement <8 x i64> %vecinit6.i, i64 %d, i32 7
- ret <8 x i64> %vecinit7.i
-}
-
-define <16 x float> @test_set1_ps(float %f) #2 {
-; GENERIC-LABEL: test_set1_ps:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_set1_ps:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %vecinit.i = insertelement <16 x float> undef, float %f, i32 0
- %vecinit1.i = insertelement <16 x float> %vecinit.i, float %f, i32 1
- %vecinit2.i = insertelement <16 x float> %vecinit1.i, float %f, i32 2
- %vecinit3.i = insertelement <16 x float> %vecinit2.i, float %f, i32 3
- %vecinit4.i = insertelement <16 x float> %vecinit3.i, float %f, i32 4
- %vecinit5.i = insertelement <16 x float> %vecinit4.i, float %f, i32 5
- %vecinit6.i = insertelement <16 x float> %vecinit5.i, float %f, i32 6
- %vecinit7.i = insertelement <16 x float> %vecinit6.i, float %f, i32 7
- %vecinit8.i = insertelement <16 x float> %vecinit7.i, float %f, i32 8
- %vecinit9.i = insertelement <16 x float> %vecinit8.i, float %f, i32 9
- %vecinit10.i = insertelement <16 x float> %vecinit9.i, float %f, i32 10
- %vecinit11.i = insertelement <16 x float> %vecinit10.i, float %f, i32 11
- %vecinit12.i = insertelement <16 x float> %vecinit11.i, float %f, i32 12
- %vecinit13.i = insertelement <16 x float> %vecinit12.i, float %f, i32 13
- %vecinit14.i = insertelement <16 x float> %vecinit13.i, float %f, i32 14
- %vecinit15.i = insertelement <16 x float> %vecinit14.i, float %f, i32 15
- ret <16 x float> %vecinit15.i
-}
-
-define <16 x i32> @test_set1_epi32(i32 %f) #2 {
-; GENERIC-LABEL: test_set1_epi32:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vpbroadcastd %edi, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_set1_epi32:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vpbroadcastd %edi, %zmm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %vecinit.i = insertelement <16 x i32> undef, i32 %f, i32 0
- %vecinit1.i = insertelement <16 x i32> %vecinit.i, i32 %f, i32 1
- %vecinit2.i = insertelement <16 x i32> %vecinit1.i, i32 %f, i32 2
- %vecinit3.i = insertelement <16 x i32> %vecinit2.i, i32 %f, i32 3
- %vecinit4.i = insertelement <16 x i32> %vecinit3.i, i32 %f, i32 4
- %vecinit5.i = insertelement <16 x i32> %vecinit4.i, i32 %f, i32 5
- %vecinit6.i = insertelement <16 x i32> %vecinit5.i, i32 %f, i32 6
- %vecinit7.i = insertelement <16 x i32> %vecinit6.i, i32 %f, i32 7
- %vecinit8.i = insertelement <16 x i32> %vecinit7.i, i32 %f, i32 8
- %vecinit9.i = insertelement <16 x i32> %vecinit8.i, i32 %f, i32 9
- %vecinit10.i = insertelement <16 x i32> %vecinit9.i, i32 %f, i32 10
- %vecinit11.i = insertelement <16 x i32> %vecinit10.i, i32 %f, i32 11
- %vecinit12.i = insertelement <16 x i32> %vecinit11.i, i32 %f, i32 12
- %vecinit13.i = insertelement <16 x i32> %vecinit12.i, i32 %f, i32 13
- %vecinit14.i = insertelement <16 x i32> %vecinit13.i, i32 %f, i32 14
- %vecinit15.i = insertelement <16 x i32> %vecinit14.i, i32 %f, i32 15
- ret <16 x i32> %vecinit15.i
-}
-
-; We implement the scalar broadcast intrinsics with vector initializers.
-; Verify that the IR generated will produce the broadcast at the end.
-define <8 x double> @test_mm512_broadcastsd_pd(<2 x double> %a) {
-; GENERIC-LABEL: test_mm512_broadcastsd_pd:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_mm512_broadcastsd_pd:
-; SKX: # %bb.0: # %entry
-; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-entry:
- %0 = extractelement <2 x double> %a, i32 0
- %vecinit.i = insertelement <8 x double> undef, double %0, i32 0
- %vecinit1.i = insertelement <8 x double> %vecinit.i, double %0, i32 1
- %vecinit2.i = insertelement <8 x double> %vecinit1.i, double %0, i32 2
- %vecinit3.i = insertelement <8 x double> %vecinit2.i, double %0, i32 3
- %vecinit4.i = insertelement <8 x double> %vecinit3.i, double %0, i32 4
- %vecinit5.i = insertelement <8 x double> %vecinit4.i, double %0, i32 5
- %vecinit6.i = insertelement <8 x double> %vecinit5.i, double %0, i32 6
- %vecinit7.i = insertelement <8 x double> %vecinit6.i, double %0, i32 7
- ret <8 x double> %vecinit7.i
-}
-
-define <16 x float> @suff_test1(<8 x float>%a) {
-; GENERIC-LABEL: suff_test1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: suff_test1:
-; SKX: # %bb.0:
-; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <8 x float> %a, <8 x float> undef, <16 x i32> zeroinitializer
- ret <16 x float>%res
-}
-
-define <8 x double> @suff_test2(<4 x double>%a) {
-; GENERIC-LABEL: suff_test2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: suff_test2:
-; SKX: # %bb.0:
-; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <4 x double> %a, <4 x double> undef, <8 x i32> zeroinitializer
- ret <8 x double>%res
-}
-
-define <64 x i8> @_invec32xi8(<32 x i8>%a) {
-; GENERIC-LABEL: _invec32xi8:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpbroadcastb %xmm0, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: _invec32xi8:
-; SKX: # %bb.0:
-; SKX-NEXT: vpbroadcastb %xmm0, %zmm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <32 x i8> %a, <32 x i8> undef, <64 x i32> zeroinitializer
- ret <64 x i8>%res
-}
-
-define <32 x i16> @_invec16xi16(<16 x i16>%a) {
-; GENERIC-LABEL: _invec16xi16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpbroadcastw %xmm0, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: _invec16xi16:
-; SKX: # %bb.0:
-; SKX-NEXT: vpbroadcastw %xmm0, %zmm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <16 x i16> %a, <16 x i16> undef, <32 x i32> zeroinitializer
- ret <32 x i16>%res
-}
-
-define <16 x i32> @_invec8xi32(<8 x i32>%a) {
-; GENERIC-LABEL: _invec8xi32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: _invec8xi32:
-; SKX: # %bb.0:
-; SKX-NEXT: vbroadcastss %xmm0, %zmm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <8 x i32> %a, <8 x i32> undef, <16 x i32> zeroinitializer
- ret <16 x i32>%res
-}
-
-define <8 x i64> @_invec4xi64(<4 x i64>%a) {
-; GENERIC-LABEL: _invec4xi64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: _invec4xi64:
-; SKX: # %bb.0:
-; SKX-NEXT: vbroadcastsd %xmm0, %zmm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <4 x i64> %a, <4 x i64> undef, <8 x i32> zeroinitializer
- ret <8 x i64>%res
-}
-
-declare void @func_f32(float)
-define <16 x float> @broadcast_ss_spill(float %x) {
-; GENERIC-LABEL: broadcast_ss_spill:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: subq $24, %rsp # sched: [1:0.33]
-; GENERIC-NEXT: .cfi_def_cfa_offset 32
-; GENERIC-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00]
-; GENERIC-NEXT: callq func_f32
-; GENERIC-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:1.00]
-; GENERIC-NEXT: addq $24, %rsp # sched: [1:0.33]
-; GENERIC-NEXT: .cfi_def_cfa_offset 8
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: broadcast_ss_spill:
-; SKX: # %bb.0:
-; SKX-NEXT: subq $24, %rsp # sched: [1:0.25]
-; SKX-NEXT: .cfi_def_cfa_offset 32
-; SKX-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00]
-; SKX-NEXT: callq func_f32
-; SKX-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:0.50]
-; SKX-NEXT: addq $24, %rsp # sched: [1:0.25]
-; SKX-NEXT: .cfi_def_cfa_offset 8
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = fadd float %x, %x
- call void @func_f32(float %a)
- %b = insertelement <16 x float> undef, float %a, i32 0
- %c = shufflevector <16 x float> %b, <16 x float> undef, <16 x i32> zeroinitializer
- ret <16 x float> %c
-}
-
-declare void @func_f64(double)
-define <8 x double> @broadcast_sd_spill(double %x) {
-; GENERIC-LABEL: broadcast_sd_spill:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: subq $24, %rsp # sched: [1:0.33]
-; GENERIC-NEXT: .cfi_def_cfa_offset 32
-; GENERIC-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00]
-; GENERIC-NEXT: callq func_f64
-; GENERIC-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:1.00]
-; GENERIC-NEXT: addq $24, %rsp # sched: [1:0.33]
-; GENERIC-NEXT: .cfi_def_cfa_offset 8
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: broadcast_sd_spill:
-; SKX: # %bb.0:
-; SKX-NEXT: subq $24, %rsp # sched: [1:0.25]
-; SKX-NEXT: .cfi_def_cfa_offset 32
-; SKX-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00]
-; SKX-NEXT: callq func_f64
-; SKX-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:0.50]
-; SKX-NEXT: addq $24, %rsp # sched: [1:0.25]
-; SKX-NEXT: .cfi_def_cfa_offset 8
-; SKX-NEXT: retq # sched: [7:1.00]
- %a = fadd double %x, %x
- call void @func_f64(double %a)
- %b = insertelement <8 x double> undef, double %a, i32 0
- %c = shufflevector <8 x double> %b, <8 x double> undef, <8 x i32> zeroinitializer
- ret <8 x double> %c
-}
diff --git a/llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll b/llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll
deleted file mode 100755
index 26f4bd578c2..00000000000
--- a/llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll
+++ /dev/null
@@ -1,15629 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx512f,+avx512dq,+avx512bw,+avx512vl | FileCheck %s --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=SKX
-
-; This test is an assembly of avx512 shuffling instructions to check their scheduling
-
-define <16 x i16> @test_16xi16_perm_mask0(<16 x i16> %vec) {
-; GENERIC-LABEL: test_16xi16_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50]
-; GENERIC-NEXT: vpermw %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xi16_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50]
-; SKX-NEXT: vpermw %ymm0, %ymm1, %ymm0 # sched: [6:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 8, i32 6, i32 12, i32 4, i32 7, i32 9, i32 14, i32 8, i32 4, i32 12, i32 9, i32 4, i32 14, i32 15, i32 12, i32 14>
- ret <16 x i16> %res
-}
-define <16 x i16> @test_masked_16xi16_perm_mask0(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_16xi16_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi16_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50]
-; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [6:2.00]
-; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 8, i32 6, i32 12, i32 4, i32 7, i32 9, i32 14, i32 8, i32 4, i32 12, i32 9, i32 4, i32 14, i32 15, i32 12, i32 14>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
- ret <16 x i16> %res
-}
-
-define <16 x i16> @test_masked_z_16xi16_perm_mask0(<16 x i16> %vec, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi16_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi16_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50]
-; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [6:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 8, i32 6, i32 12, i32 4, i32 7, i32 9, i32 14, i32 8, i32 4, i32 12, i32 9, i32 4, i32 14, i32 15, i32 12, i32 14>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
- ret <16 x i16> %res
-}
-define <16 x i16> @test_masked_16xi16_perm_mask1(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_16xi16_perm_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi16_perm_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50]
-; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [6:2.00]
-; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 4, i32 11, i32 14, i32 10, i32 7, i32 1, i32 6, i32 9, i32 14, i32 15, i32 7, i32 13, i32 4, i32 12, i32 8, i32 0>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
- ret <16 x i16> %res
-}
-
-define <16 x i16> @test_masked_z_16xi16_perm_mask1(<16 x i16> %vec, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi16_perm_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi16_perm_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50]
-; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [6:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 4, i32 11, i32 14, i32 10, i32 7, i32 1, i32 6, i32 9, i32 14, i32 15, i32 7, i32 13, i32 4, i32 12, i32 8, i32 0>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
- ret <16 x i16> %res
-}
-define <16 x i16> @test_masked_16xi16_perm_mask2(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_16xi16_perm_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi16_perm_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50]
-; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [6:2.00]
-; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 11, i32 6, i32 13, i32 10, i32 0, i32 7, i32 13, i32 3, i32 5, i32 13, i32 3, i32 9, i32 3, i32 15, i32 12, i32 7>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
- ret <16 x i16> %res
-}
-
-define <16 x i16> @test_masked_z_16xi16_perm_mask2(<16 x i16> %vec, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi16_perm_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi16_perm_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50]
-; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [6:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 11, i32 6, i32 13, i32 10, i32 0, i32 7, i32 13, i32 3, i32 5, i32 13, i32 3, i32 9, i32 3, i32 15, i32 12, i32 7>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
- ret <16 x i16> %res
-}
-define <16 x i16> @test_16xi16_perm_mask3(<16 x i16> %vec) {
-; GENERIC-LABEL: test_16xi16_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50]
-; GENERIC-NEXT: vpermw %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xi16_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50]
-; SKX-NEXT: vpermw %ymm0, %ymm1, %ymm0 # sched: [6:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 5, i32 8, i32 14, i32 1, i32 8, i32 11, i32 8, i32 13, i32 8, i32 15, i32 9, i32 9, i32 7, i32 9, i32 6>
- ret <16 x i16> %res
-}
-define <16 x i16> @test_masked_16xi16_perm_mask3(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_16xi16_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi16_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50]
-; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1} # sched: [6:2.00]
-; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 5, i32 8, i32 14, i32 1, i32 8, i32 11, i32 8, i32 13, i32 8, i32 15, i32 9, i32 9, i32 7, i32 9, i32 6>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
- ret <16 x i16> %res
-}
-
-define <16 x i16> @test_masked_z_16xi16_perm_mask3(<16 x i16> %vec, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi16_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi16_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50]
-; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [6:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 5, i32 8, i32 14, i32 1, i32 8, i32 11, i32 8, i32 13, i32 8, i32 15, i32 9, i32 9, i32 7, i32 9, i32 6>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
- ret <16 x i16> %res
-}
-define <16 x i16> @test_16xi16_perm_mem_mask0(<16 x i16>* %vp) {
-; GENERIC-LABEL: test_16xi16_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa {{.*#+}} ymm0 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50]
-; GENERIC-NEXT: vpermw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xi16_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa {{.*#+}} ymm0 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50]
-; SKX-NEXT: vpermw (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i16>, <16 x i16>* %vp
- %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 9, i32 10, i32 7, i32 1, i32 12, i32 14, i32 14, i32 13, i32 14, i32 14, i32 8, i32 6, i32 11, i32 4, i32 12, i32 13>
- ret <16 x i16> %res
-}
-define <16 x i16> @test_masked_16xi16_perm_mem_mask0(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_16xi16_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi16_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50]
-; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [13:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i16>, <16 x i16>* %vp
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 9, i32 10, i32 7, i32 1, i32 12, i32 14, i32 14, i32 13, i32 14, i32 14, i32 8, i32 6, i32 11, i32 4, i32 12, i32 13>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
- ret <16 x i16> %res
-}
-
-define <16 x i16> @test_masked_z_16xi16_perm_mem_mask0(<16 x i16>* %vp, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi16_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi16_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50]
-; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [13:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i16>, <16 x i16>* %vp
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 9, i32 10, i32 7, i32 1, i32 12, i32 14, i32 14, i32 13, i32 14, i32 14, i32 8, i32 6, i32 11, i32 4, i32 12, i32 13>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
- ret <16 x i16> %res
-}
-
-define <16 x i16> @test_masked_16xi16_perm_mem_mask1(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_16xi16_perm_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi16_perm_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50]
-; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [13:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i16>, <16 x i16>* %vp
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 14, i32 9, i32 15, i32 9, i32 7, i32 10, i32 15, i32 14, i32 12, i32 1, i32 9, i32 7, i32 10, i32 13, i32 3, i32 11>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
- ret <16 x i16> %res
-}
-
-define <16 x i16> @test_masked_z_16xi16_perm_mem_mask1(<16 x i16>* %vp, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi16_perm_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi16_perm_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50]
-; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [13:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i16>, <16 x i16>* %vp
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 14, i32 9, i32 15, i32 9, i32 7, i32 10, i32 15, i32 14, i32 12, i32 1, i32 9, i32 7, i32 10, i32 13, i32 3, i32 11>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
- ret <16 x i16> %res
-}
-
-define <16 x i16> @test_masked_16xi16_perm_mem_mask2(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_16xi16_perm_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi16_perm_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50]
-; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [13:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i16>, <16 x i16>* %vp
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 12, i32 5, i32 13, i32 1, i32 2, i32 11, i32 0, i32 9, i32 14, i32 8, i32 10, i32 0, i32 10, i32 9>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
- ret <16 x i16> %res
-}
-
-define <16 x i16> @test_masked_z_16xi16_perm_mem_mask2(<16 x i16>* %vp, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi16_perm_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi16_perm_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50]
-; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [13:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i16>, <16 x i16>* %vp
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 12, i32 5, i32 13, i32 1, i32 2, i32 11, i32 0, i32 9, i32 14, i32 8, i32 10, i32 0, i32 10, i32 9>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
- ret <16 x i16> %res
-}
-
-define <16 x i16> @test_16xi16_perm_mem_mask3(<16 x i16>* %vp) {
-; GENERIC-LABEL: test_16xi16_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa {{.*#+}} ymm0 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50]
-; GENERIC-NEXT: vpermw (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xi16_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa {{.*#+}} ymm0 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50]
-; SKX-NEXT: vpermw (%rdi), %ymm0, %ymm0 # sched: [13:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i16>, <16 x i16>* %vp
- %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 9, i32 6, i32 5, i32 15, i32 0, i32 0, i32 15, i32 2, i32 1, i32 3, i32 12, i32 14, i32 0, i32 6, i32 1, i32 4>
- ret <16 x i16> %res
-}
-define <16 x i16> @test_masked_16xi16_perm_mem_mask3(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_16xi16_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi16_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50]
-; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1} # sched: [13:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i16>, <16 x i16>* %vp
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 9, i32 6, i32 5, i32 15, i32 0, i32 0, i32 15, i32 2, i32 1, i32 3, i32 12, i32 14, i32 0, i32 6, i32 1, i32 4>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
- ret <16 x i16> %res
-}
-
-define <16 x i16> @test_masked_z_16xi16_perm_mem_mask3(<16 x i16>* %vp, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi16_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi16_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50]
-; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [13:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i16>, <16 x i16>* %vp
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 9, i32 6, i32 5, i32 15, i32 0, i32 0, i32 15, i32 2, i32 1, i32 3, i32 12, i32 14, i32 0, i32 6, i32 1, i32 4>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
- ret <16 x i16> %res
-}
-
-define <32 x i16> @test_32xi16_perm_mask0(<32 x i16> %vec) {
-; GENERIC-LABEL: test_32xi16_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [7:0.50]
-; GENERIC-NEXT: vpermw %zmm0, %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_32xi16_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [8:0.50]
-; SKX-NEXT: vpermw %zmm0, %zmm1, %zmm0 # sched: [6:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 16, i32 1, i32 3, i32 31, i32 6, i32 11, i32 23, i32 26, i32 29, i32 5, i32 21, i32 30, i32 1, i32 21, i32 27, i32 10, i32 8, i32 19, i32 14, i32 5, i32 15, i32 13, i32 18, i32 16, i32 9, i32 11, i32 26, i32 8, i32 17, i32 0, i32 23, i32 10>
- ret <32 x i16> %res
-}
-define <32 x i16> @test_masked_32xi16_perm_mask0(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_32xi16_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_32xi16_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [8:0.50]
-; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [6:2.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 16, i32 1, i32 3, i32 31, i32 6, i32 11, i32 23, i32 26, i32 29, i32 5, i32 21, i32 30, i32 1, i32 21, i32 27, i32 10, i32 8, i32 19, i32 14, i32 5, i32 15, i32 13, i32 18, i32 16, i32 9, i32 11, i32 26, i32 8, i32 17, i32 0, i32 23, i32 10>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
- ret <32 x i16> %res
-}
-
-define <32 x i16> @test_masked_z_32xi16_perm_mask0(<32 x i16> %vec, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_32xi16_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_32xi16_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10] sched: [8:0.50]
-; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [6:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 16, i32 1, i32 3, i32 31, i32 6, i32 11, i32 23, i32 26, i32 29, i32 5, i32 21, i32 30, i32 1, i32 21, i32 27, i32 10, i32 8, i32 19, i32 14, i32 5, i32 15, i32 13, i32 18, i32 16, i32 9, i32 11, i32 26, i32 8, i32 17, i32 0, i32 23, i32 10>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
- ret <32 x i16> %res
-}
-define <32 x i16> @test_masked_32xi16_perm_mask1(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_32xi16_perm_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_32xi16_perm_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] sched: [8:0.50]
-; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [6:2.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 1, i32 8, i32 7, i32 30, i32 11, i32 9, i32 11, i32 30, i32 20, i32 19, i32 22, i32 12, i32 13, i32 20, i32 0, i32 6, i32 10, i32 7, i32 20, i32 12, i32 28, i32 18, i32 13, i32 12, i32 22, i32 13, i32 21, i32 1, i32 14, i32 8, i32 5, i32 16>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
- ret <32 x i16> %res
-}
-
-define <32 x i16> @test_masked_z_32xi16_perm_mask1(<32 x i16> %vec, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_32xi16_perm_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_32xi16_perm_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16] sched: [8:0.50]
-; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [6:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 1, i32 8, i32 7, i32 30, i32 11, i32 9, i32 11, i32 30, i32 20, i32 19, i32 22, i32 12, i32 13, i32 20, i32 0, i32 6, i32 10, i32 7, i32 20, i32 12, i32 28, i32 18, i32 13, i32 12, i32 22, i32 13, i32 21, i32 1, i32 14, i32 8, i32 5, i32 16>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
- ret <32 x i16> %res
-}
-define <32 x i16> @test_masked_32xi16_perm_mask2(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_32xi16_perm_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_32xi16_perm_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] sched: [8:0.50]
-; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [6:2.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 15, i32 17, i32 24, i32 28, i32 15, i32 9, i32 14, i32 25, i32 28, i32 25, i32 6, i32 31, i32 20, i32 2, i32 23, i32 31, i32 12, i32 21, i32 10, i32 6, i32 22, i32 0, i32 26, i32 16, i32 3, i32 3, i32 20, i32 27, i32 8, i32 31, i32 3, i32 27>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
- ret <32 x i16> %res
-}
-
-define <32 x i16> @test_masked_z_32xi16_perm_mask2(<32 x i16> %vec, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_32xi16_perm_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_32xi16_perm_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27] sched: [8:0.50]
-; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [6:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 15, i32 17, i32 24, i32 28, i32 15, i32 9, i32 14, i32 25, i32 28, i32 25, i32 6, i32 31, i32 20, i32 2, i32 23, i32 31, i32 12, i32 21, i32 10, i32 6, i32 22, i32 0, i32 26, i32 16, i32 3, i32 3, i32 20, i32 27, i32 8, i32 31, i32 3, i32 27>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
- ret <32 x i16> %res
-}
-define <32 x i16> @test_32xi16_perm_mask3(<32 x i16> %vec) {
-; GENERIC-LABEL: test_32xi16_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [7:0.50]
-; GENERIC-NEXT: vpermw %zmm0, %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_32xi16_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [8:0.50]
-; SKX-NEXT: vpermw %zmm0, %zmm1, %zmm0 # sched: [6:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 12, i32 2, i32 8, i32 14, i32 25, i32 27, i32 4, i32 16, i32 20, i32 11, i32 27, i32 8, i32 0, i32 1, i32 21, i32 17, i32 30, i32 30, i32 29, i32 1, i32 23, i32 22, i32 20, i32 22, i32 28, i32 20, i32 11, i32 17, i32 6, i32 18, i32 0, i32 4>
- ret <32 x i16> %res
-}
-define <32 x i16> @test_masked_32xi16_perm_mask3(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_32xi16_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_32xi16_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [8:0.50]
-; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1} # sched: [6:2.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 12, i32 2, i32 8, i32 14, i32 25, i32 27, i32 4, i32 16, i32 20, i32 11, i32 27, i32 8, i32 0, i32 1, i32 21, i32 17, i32 30, i32 30, i32 29, i32 1, i32 23, i32 22, i32 20, i32 22, i32 28, i32 20, i32 11, i32 17, i32 6, i32 18, i32 0, i32 4>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
- ret <32 x i16> %res
-}
-
-define <32 x i16> @test_masked_z_32xi16_perm_mask3(<32 x i16> %vec, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_32xi16_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_32xi16_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4] sched: [8:0.50]
-; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [6:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 12, i32 2, i32 8, i32 14, i32 25, i32 27, i32 4, i32 16, i32 20, i32 11, i32 27, i32 8, i32 0, i32 1, i32 21, i32 17, i32 30, i32 30, i32 29, i32 1, i32 23, i32 22, i32 20, i32 22, i32 28, i32 20, i32 11, i32 17, i32 6, i32 18, i32 0, i32 4>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
- ret <32 x i16> %res
-}
-define <32 x i16> @test_32xi16_perm_mem_mask0(<32 x i16>* %vp) {
-; GENERIC-LABEL: test_32xi16_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm0 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [7:0.50]
-; GENERIC-NEXT: vpermw (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_32xi16_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm0 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [8:0.50]
-; SKX-NEXT: vpermw (%rdi), %zmm0, %zmm0 # sched: [13:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i16>, <32 x i16>* %vp
- %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 19, i32 1, i32 5, i32 31, i32 9, i32 12, i32 17, i32 9, i32 15, i32 7, i32 1, i32 5, i32 16, i32 2, i32 12, i32 10, i32 13, i32 3, i32 29, i32 15, i32 26, i32 31, i32 10, i32 15, i32 22, i32 13, i32 9, i32 23, i32 28, i32 29, i32 20, i32 12>
- ret <32 x i16> %res
-}
-define <32 x i16> @test_masked_32xi16_perm_mem_mask0(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_32xi16_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_32xi16_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [8:0.50]
-; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [13:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i16>, <32 x i16>* %vp
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 19, i32 1, i32 5, i32 31, i32 9, i32 12, i32 17, i32 9, i32 15, i32 7, i32 1, i32 5, i32 16, i32 2, i32 12, i32 10, i32 13, i32 3, i32 29, i32 15, i32 26, i32 31, i32 10, i32 15, i32 22, i32 13, i32 9, i32 23, i32 28, i32 29, i32 20, i32 12>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
- ret <32 x i16> %res
-}
-
-define <32 x i16> @test_masked_z_32xi16_perm_mem_mask0(<32 x i16>* %vp, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_32xi16_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_32xi16_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12] sched: [8:0.50]
-; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [13:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i16>, <32 x i16>* %vp
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 19, i32 1, i32 5, i32 31, i32 9, i32 12, i32 17, i32 9, i32 15, i32 7, i32 1, i32 5, i32 16, i32 2, i32 12, i32 10, i32 13, i32 3, i32 29, i32 15, i32 26, i32 31, i32 10, i32 15, i32 22, i32 13, i32 9, i32 23, i32 28, i32 29, i32 20, i32 12>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
- ret <32 x i16> %res
-}
-
-define <32 x i16> @test_masked_32xi16_perm_mem_mask1(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_32xi16_perm_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_32xi16_perm_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [8:0.50]
-; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [13:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i16>, <32 x i16>* %vp
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 31, i32 20, i32 2, i32 2, i32 23, i32 1, i32 0, i32 12, i32 16, i32 14, i32 15, i32 18, i32 21, i32 13, i32 11, i32 31, i32 8, i32 24, i32 13, i32 11, i32 2, i32 27, i32 22, i32 28, i32 14, i32 21, i32 3, i32 12, i32 6, i32 1, i32 30, i32 6>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
- ret <32 x i16> %res
-}
-
-define <32 x i16> @test_masked_z_32xi16_perm_mem_mask1(<32 x i16>* %vp, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_32xi16_perm_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_32xi16_perm_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6] sched: [8:0.50]
-; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [13:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i16>, <32 x i16>* %vp
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 31, i32 20, i32 2, i32 2, i32 23, i32 1, i32 0, i32 12, i32 16, i32 14, i32 15, i32 18, i32 21, i32 13, i32 11, i32 31, i32 8, i32 24, i32 13, i32 11, i32 2, i32 27, i32 22, i32 28, i32 14, i32 21, i32 3, i32 12, i32 6, i32 1, i32 30, i32 6>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
- ret <32 x i16> %res
-}
-
-define <32 x i16> @test_masked_32xi16_perm_mem_mask2(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_32xi16_perm_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_32xi16_perm_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [8:0.50]
-; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [13:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i16>, <32 x i16>* %vp
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 4, i32 6, i32 12, i32 17, i32 4, i32 31, i32 31, i32 4, i32 12, i32 21, i32 28, i32 15, i32 29, i32 10, i32 15, i32 15, i32 21, i32 6, i32 19, i32 7, i32 10, i32 30, i32 28, i32 26, i32 1, i32 4, i32 8, i32 25, i32 26, i32 18, i32 22, i32 25>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
- ret <32 x i16> %res
-}
-
-define <32 x i16> @test_masked_z_32xi16_perm_mem_mask2(<32 x i16>* %vp, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_32xi16_perm_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_32xi16_perm_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25] sched: [8:0.50]
-; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [13:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i16>, <32 x i16>* %vp
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 4, i32 6, i32 12, i32 17, i32 4, i32 31, i32 31, i32 4, i32 12, i32 21, i32 28, i32 15, i32 29, i32 10, i32 15, i32 15, i32 21, i32 6, i32 19, i32 7, i32 10, i32 30, i32 28, i32 26, i32 1, i32 4, i32 8, i32 25, i32 26, i32 18, i32 22, i32 25>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
- ret <32 x i16> %res
-}
-
-define <32 x i16> @test_32xi16_perm_mem_mask3(<32 x i16>* %vp) {
-; GENERIC-LABEL: test_32xi16_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [7:0.50]
-; GENERIC-NEXT: vpermw (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_32xi16_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [8:0.50]
-; SKX-NEXT: vpermw (%rdi), %zmm0, %zmm0 # sched: [13:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i16>, <32 x i16>* %vp
- %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 2, i32 27, i32 1, i32 7, i32 1, i32 0, i32 27, i32 10, i32 5, i32 4, i32 20, i32 30, i32 16, i32 28, i32 16, i32 18, i32 21, i32 25, i32 24, i32 31, i32 23, i32 28, i32 6, i32 17, i32 19, i32 26, i32 15, i32 25, i32 12, i32 18, i32 27>
- ret <32 x i16> %res
-}
-define <32 x i16> @test_masked_32xi16_perm_mem_mask3(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_32xi16_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_32xi16_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [8:0.50]
-; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1} # sched: [13:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i16>, <32 x i16>* %vp
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 2, i32 27, i32 1, i32 7, i32 1, i32 0, i32 27, i32 10, i32 5, i32 4, i32 20, i32 30, i32 16, i32 28, i32 16, i32 18, i32 21, i32 25, i32 24, i32 31, i32 23, i32 28, i32 6, i32 17, i32 19, i32 26, i32 15, i32 25, i32 12, i32 18, i32 27>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
- ret <32 x i16> %res
-}
-
-define <32 x i16> @test_masked_z_32xi16_perm_mem_mask3(<32 x i16>* %vp, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_32xi16_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_32xi16_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27] sched: [8:0.50]
-; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [13:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i16>, <32 x i16>* %vp
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 2, i32 27, i32 1, i32 7, i32 1, i32 0, i32 27, i32 10, i32 5, i32 4, i32 20, i32 30, i32 16, i32 28, i32 16, i32 18, i32 21, i32 25, i32 24, i32 31, i32 23, i32 28, i32 6, i32 17, i32 19, i32 26, i32 15, i32 25, i32 12, i32 18, i32 27>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
- ret <32 x i16> %res
-}
-
-define <8 x i32> @test_8xi32_perm_mask0(<8 x i32> %vec) {
-; GENERIC-LABEL: test_8xi32_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [4,2,0,6,7,2,3,6] sched: [7:0.50]
-; GENERIC-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi32_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [4,2,0,6,7,2,3,6] sched: [7:0.50]
-; SKX-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 4, i32 2, i32 0, i32 6, i32 7, i32 2, i32 3, i32 6>
- ret <8 x i32> %res
-}
-define <8 x i32> @test_masked_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test_masked_8xi32_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [4,2,0,6,7,2,3,6] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi32_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [4,2,0,6,7,2,3,6] sched: [7:0.50]
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [3:1.00]
-; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 4, i32 2, i32 0, i32 6, i32 7, i32 2, i32 3, i32 6>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
- ret <8 x i32> %res
-}
-
-define <8 x i32> @test_masked_z_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi32_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [4,2,0,6,7,2,3,6] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi32_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [4,2,0,6,7,2,3,6] sched: [7:0.50]
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 4, i32 2, i32 0, i32 6, i32 7, i32 2, i32 3, i32 6>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
- ret <8 x i32> %res
-}
-define <8 x i32> @test_masked_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test_masked_8xi32_perm_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [0,5,1,2,6,0,0,3] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi32_perm_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [0,5,1,2,6,0,0,3] sched: [7:0.50]
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [3:1.00]
-; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 5, i32 1, i32 2, i32 6, i32 0, i32 0, i32 3>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
- ret <8 x i32> %res
-}
-
-define <8 x i32> @test_masked_z_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi32_perm_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [0,5,1,2,6,0,0,3] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi32_perm_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [0,5,1,2,6,0,0,3] sched: [7:0.50]
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 5, i32 1, i32 2, i32 6, i32 0, i32 0, i32 3>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
- ret <8 x i32> %res
-}
-define <8 x i32> @test_masked_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test_masked_8xi32_perm_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [3,6,5,5,1,7,3,4] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi32_perm_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [3,6,5,5,1,7,3,4] sched: [7:0.50]
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [3:1.00]
-; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 6, i32 5, i32 5, i32 1, i32 7, i32 3, i32 4>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
- ret <8 x i32> %res
-}
-
-define <8 x i32> @test_masked_z_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi32_perm_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [3,6,5,5,1,7,3,4] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi32_perm_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [3,6,5,5,1,7,3,4] sched: [7:0.50]
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 6, i32 5, i32 5, i32 1, i32 7, i32 3, i32 4>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
- ret <8 x i32> %res
-}
-define <8 x i32> @test_8xi32_perm_mask3(<8 x i32> %vec) {
-; GENERIC-LABEL: test_8xi32_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [3,0,3,1,0,4,5,0] sched: [7:0.50]
-; GENERIC-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi32_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [3,0,3,1,0,4,5,0] sched: [7:0.50]
-; SKX-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 0, i32 3, i32 1, i32 0, i32 4, i32 5, i32 0>
- ret <8 x i32> %res
-}
-define <8 x i32> @test_masked_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test_masked_8xi32_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa {{.*#+}} ymm3 = [3,0,3,1,0,4,5,0] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi32_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa {{.*#+}} ymm3 = [3,0,3,1,0,4,5,0] sched: [7:0.50]
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1} # sched: [3:1.00]
-; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 0, i32 3, i32 1, i32 0, i32 4, i32 5, i32 0>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
- ret <8 x i32> %res
-}
-
-define <8 x i32> @test_masked_z_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi32_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [3,0,3,1,0,4,5,0] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi32_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [3,0,3,1,0,4,5,0] sched: [7:0.50]
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 0, i32 3, i32 1, i32 0, i32 4, i32 5, i32 0>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
- ret <8 x i32> %res
-}
-define <8 x i32> @test_8xi32_perm_mem_mask0(<8 x i32>* %vp) {
-; GENERIC-LABEL: test_8xi32_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} ymm0 = [3,7,4,3,5,2,0,5] sched: [7:0.50]
-; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi32_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} ymm0 = [3,7,4,3,5,2,0,5] sched: [7:0.50]
-; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i32>, <8 x i32>* %vp
- %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 7, i32 4, i32 3, i32 5, i32 2, i32 0, i32 5>
- ret <8 x i32> %res
-}
-define <8 x i32> @test_masked_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test_masked_8xi32_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [3,7,4,3,5,2,0,5] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi32_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [3,7,4,3,5,2,0,5] sched: [7:0.50]
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i32>, <8 x i32>* %vp
- %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 7, i32 4, i32 3, i32 5, i32 2, i32 0, i32 5>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
- ret <8 x i32> %res
-}
-
-define <8 x i32> @test_masked_z_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi32_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [3,7,4,3,5,2,0,5] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi32_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [3,7,4,3,5,2,0,5] sched: [7:0.50]
-; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i32>, <8 x i32>* %vp
- %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 7, i32 4, i32 3, i32 5, i32 2, i32 0, i32 5>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
- ret <8 x i32> %res
-}
-
-define <8 x i32> @test_masked_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test_masked_8xi32_perm_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [4,6,1,7,6,7,6,5] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi32_perm_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [4,6,1,7,6,7,6,5] sched: [7:0.50]
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i32>, <8 x i32>* %vp
- %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 4, i32 6, i32 1, i32 7, i32 6, i32 7, i32 6, i32 5>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
- ret <8 x i32> %res
-}
-
-define <8 x i32> @test_masked_z_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi32_perm_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [4,6,1,7,6,7,6,5] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi32_perm_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [4,6,1,7,6,7,6,5] sched: [7:0.50]
-; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i32>, <8 x i32>* %vp
- %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 4, i32 6, i32 1, i32 7, i32 6, i32 7, i32 6, i32 5>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
- ret <8 x i32> %res
-}
-
-define <8 x i32> @test_masked_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test_masked_8xi32_perm_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [6,4,6,1,6,3,6,3] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi32_perm_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [6,4,6,1,6,3,6,3] sched: [7:0.50]
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i32>, <8 x i32>* %vp
- %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 6, i32 4, i32 6, i32 1, i32 6, i32 3, i32 6, i32 3>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
- ret <8 x i32> %res
-}
-
-define <8 x i32> @test_masked_z_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi32_perm_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [6,4,6,1,6,3,6,3] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi32_perm_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [6,4,6,1,6,3,6,3] sched: [7:0.50]
-; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i32>, <8 x i32>* %vp
- %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 6, i32 4, i32 6, i32 1, i32 6, i32 3, i32 6, i32 3>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
- ret <8 x i32> %res
-}
-
-define <8 x i32> @test_8xi32_perm_mem_mask3(<8 x i32>* %vp) {
-; GENERIC-LABEL: test_8xi32_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} ymm0 = [6,0,0,7,3,7,7,5] sched: [7:0.50]
-; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi32_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} ymm0 = [6,0,0,7,3,7,7,5] sched: [7:0.50]
-; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i32>, <8 x i32>* %vp
- %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 6, i32 0, i32 0, i32 7, i32 3, i32 7, i32 7, i32 5>
- ret <8 x i32> %res
-}
-define <8 x i32> @test_masked_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test_masked_8xi32_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa {{.*#+}} ymm2 = [6,0,0,7,3,7,7,5] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi32_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa {{.*#+}} ymm2 = [6,0,0,7,3,7,7,5] sched: [7:0.50]
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i32>, <8 x i32>* %vp
- %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 6, i32 0, i32 0, i32 7, i32 3, i32 7, i32 7, i32 5>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
- ret <8 x i32> %res
-}
-
-define <8 x i32> @test_masked_z_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi32_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa {{.*#+}} ymm1 = [6,0,0,7,3,7,7,5] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi32_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa {{.*#+}} ymm1 = [6,0,0,7,3,7,7,5] sched: [7:0.50]
-; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i32>, <8 x i32>* %vp
- %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 6, i32 0, i32 0, i32 7, i32 3, i32 7, i32 7, i32 5>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
- ret <8 x i32> %res
-}
-
-define <16 x i32> @test_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xi32_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [7:0.50]
-; GENERIC-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xi32_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [8:0.50]
-; SKX-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 14, i32 12, i32 11, i32 6, i32 4, i32 1, i32 6, i32 9, i32 14, i32 14, i32 6, i32 1, i32 12, i32 11, i32 0, i32 7>
- ret <16 x i32> %res
-}
-define <16 x i32> @test_masked_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test_masked_16xi32_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi32_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [8:0.50]
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 14, i32 12, i32 11, i32 6, i32 4, i32 1, i32 6, i32 9, i32 14, i32 14, i32 6, i32 1, i32 12, i32 11, i32 0, i32 7>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
- ret <16 x i32> %res
-}
-
-define <16 x i32> @test_masked_z_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi32_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi32_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7] sched: [8:0.50]
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 14, i32 12, i32 11, i32 6, i32 4, i32 1, i32 6, i32 9, i32 14, i32 14, i32 6, i32 1, i32 12, i32 11, i32 0, i32 7>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
- ret <16 x i32> %res
-}
-define <16 x i32> @test_masked_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test_masked_16xi32_perm_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi32_perm_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [8:0.50]
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 10, i32 0, i32 14, i32 15, i32 11, i32 1, i32 1, i32 5, i32 0, i32 5, i32 0, i32 15, i32 13, i32 1, i32 14, i32 3>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
- ret <16 x i32> %res
-}
-
-define <16 x i32> @test_masked_z_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi32_perm_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi32_perm_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3] sched: [8:0.50]
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 10, i32 0, i32 14, i32 15, i32 11, i32 1, i32 1, i32 5, i32 0, i32 5, i32 0, i32 15, i32 13, i32 1, i32 14, i32 3>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
- ret <16 x i32> %res
-}
-define <16 x i32> @test_masked_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test_masked_16xi32_perm_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi32_perm_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [8:0.50]
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 10, i32 15, i32 1, i32 0, i32 5, i32 0, i32 9, i32 13, i32 2, i32 1, i32 5, i32 15, i32 2, i32 15, i32 5>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
- ret <16 x i32> %res
-}
-
-define <16 x i32> @test_masked_z_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi32_perm_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi32_perm_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5] sched: [8:0.50]
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 10, i32 15, i32 1, i32 0, i32 5, i32 0, i32 9, i32 13, i32 2, i32 1, i32 5, i32 15, i32 2, i32 15, i32 5>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
- ret <16 x i32> %res
-}
-define <16 x i32> @test_16xi32_perm_mask3(<16 x i32> %vec) {
-; GENERIC-LABEL: test_16xi32_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [7:0.50]
-; GENERIC-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xi32_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [8:0.50]
-; SKX-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 7, i32 4, i32 14, i32 15, i32 10, i32 2, i32 15, i32 1, i32 9, i32 2, i32 14, i32 15, i32 12, i32 5, i32 3, i32 12>
- ret <16 x i32> %res
-}
-define <16 x i32> @test_masked_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test_masked_16xi32_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi32_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [8:0.50]
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 7, i32 4, i32 14, i32 15, i32 10, i32 2, i32 15, i32 1, i32 9, i32 2, i32 14, i32 15, i32 12, i32 5, i32 3, i32 12>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
- ret <16 x i32> %res
-}
-
-define <16 x i32> @test_masked_z_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi32_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi32_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12] sched: [8:0.50]
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 7, i32 4, i32 14, i32 15, i32 10, i32 2, i32 15, i32 1, i32 9, i32 2, i32 14, i32 15, i32 12, i32 5, i32 3, i32 12>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
- ret <16 x i32> %res
-}
-define <16 x i32> @test_16xi32_perm_mem_mask0(<16 x i32>* %vp) {
-; GENERIC-LABEL: test_16xi32_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [7:0.50]
-; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xi32_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [8:0.50]
-; SKX-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i32>, <16 x i32>* %vp
- %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 1, i32 6, i32 8, i32 11, i32 2, i32 6, i32 10, i32 1, i32 7, i32 5, i32 15, i32 0, i32 6, i32 6>
- ret <16 x i32> %res
-}
-define <16 x i32> @test_masked_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi32_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [8:0.50]
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i32>, <16 x i32>* %vp
- %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 1, i32 6, i32 8, i32 11, i32 2, i32 6, i32 10, i32 1, i32 7, i32 5, i32 15, i32 0, i32 6, i32 6>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
- ret <16 x i32> %res
-}
-
-define <16 x i32> @test_masked_z_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6] sched: [8:0.50]
-; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i32>, <16 x i32>* %vp
- %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 1, i32 6, i32 8, i32 11, i32 2, i32 6, i32 10, i32 1, i32 7, i32 5, i32 15, i32 0, i32 6, i32 6>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
- ret <16 x i32> %res
-}
-
-define <16 x i32> @test_masked_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi32_perm_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [8:0.50]
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i32>, <16 x i32>* %vp
- %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 11, i32 5, i32 3, i32 4, i32 7, i32 15, i32 12, i32 4, i32 8, i32 11, i32 12, i32 7, i32 6, i32 12, i32 6, i32 3>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
- ret <16 x i32> %res
-}
-
-define <16 x i32> @test_masked_z_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3] sched: [8:0.50]
-; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i32>, <16 x i32>* %vp
- %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 11, i32 5, i32 3, i32 4, i32 7, i32 15, i32 12, i32 4, i32 8, i32 11, i32 12, i32 7, i32 6, i32 12, i32 6, i32 3>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
- ret <16 x i32> %res
-}
-
-define <16 x i32> @test_masked_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi32_perm_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [8:0.50]
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i32>, <16 x i32>* %vp
- %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 7, i32 14, i32 2, i32 7, i32 10, i32 7, i32 3, i32 0, i32 11, i32 9, i32 0, i32 4, i32 12, i32 10, i32 8, i32 2>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
- ret <16 x i32> %res
-}
-
-define <16 x i32> @test_masked_z_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2] sched: [8:0.50]
-; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i32>, <16 x i32>* %vp
- %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 7, i32 14, i32 2, i32 7, i32 10, i32 7, i32 3, i32 0, i32 11, i32 9, i32 0, i32 4, i32 12, i32 10, i32 8, i32 2>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
- ret <16 x i32> %res
-}
-
-define <16 x i32> @test_16xi32_perm_mem_mask3(<16 x i32>* %vp) {
-; GENERIC-LABEL: test_16xi32_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [7:0.50]
-; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xi32_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [8:0.50]
-; SKX-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i32>, <16 x i32>* %vp
- %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 11, i32 7, i32 10, i32 12, i32 3, i32 12, i32 4, i32 15, i32 1, i32 14, i32 0, i32 4, i32 8, i32 9, i32 6, i32 1>
- ret <16 x i32> %res
-}
-define <16 x i32> @test_masked_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test_masked_16xi32_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi32_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [8:0.50]
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i32>, <16 x i32>* %vp
- %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 11, i32 7, i32 10, i32 12, i32 3, i32 12, i32 4, i32 15, i32 1, i32 14, i32 0, i32 4, i32 8, i32 9, i32 6, i32 1>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
- ret <16 x i32> %res
-}
-
-define <16 x i32> @test_masked_z_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi32_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi32_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1] sched: [8:0.50]
-; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i32>, <16 x i32>* %vp
- %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 11, i32 7, i32 10, i32 12, i32 3, i32 12, i32 4, i32 15, i32 1, i32 14, i32 0, i32 4, i32 8, i32 9, i32 6, i32 1>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
- ret <16 x i32> %res
-}
-
-define <4 x i64> @test_4xi64_perm_mask0(<4 x i64> %vec) {
-; GENERIC-LABEL: test_4xi64_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,0,3,1] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xi64_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,0,3,1] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
- ret <4 x i64> %res
-}
-define <4 x i64> @test_masked_4xi64_perm_mask0(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) {
-; GENERIC-LABEL: test_masked_4xi64_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,0,3,1] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_4xi64_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,0,3,1] sched: [3:1.00]
-; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2
- ret <4 x i64> %res
-}
-
-define <4 x i64> @test_masked_z_4xi64_perm_mask0(<4 x i64> %vec, <4 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_4xi64_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,0,3,1] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_4xi64_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,0,3,1] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
- ret <4 x i64> %res
-}
-define <4 x i64> @test_masked_4xi64_perm_mask1(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) {
-; GENERIC-LABEL: test_masked_4xi64_perm_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_4xi64_perm_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3] sched: [3:1.00]
-; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 1, i32 2, i32 0, i32 3>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2
- ret <4 x i64> %res
-}
-
-define <4 x i64> @test_masked_z_4xi64_perm_mask1(<4 x i64> %vec, <4 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_4xi64_perm_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_4xi64_perm_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 1, i32 2, i32 0, i32 3>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
- ret <4 x i64> %res
-}
-define <4 x i64> @test_masked_4xi64_perm_mask2(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) {
-; GENERIC-LABEL: test_masked_4xi64_perm_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,2,2,1] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_4xi64_perm_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,2,2,1] sched: [3:1.00]
-; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 1>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2
- ret <4 x i64> %res
-}
-
-define <4 x i64> @test_masked_z_4xi64_perm_mask2(<4 x i64> %vec, <4 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_4xi64_perm_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,2,2,1] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_4xi64_perm_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,2,2,1] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 1>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
- ret <4 x i64> %res
-}
-define <4 x i64> @test_4xi64_perm_mask3(<4 x i64> %vec) {
-; GENERIC-LABEL: test_4xi64_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,3] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xi64_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,3] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 3>
- ret <4 x i64> %res
-}
-define <4 x i64> @test_masked_4xi64_perm_mask3(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) {
-; GENERIC-LABEL: test_masked_4xi64_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,3] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_4xi64_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,3] sched: [3:1.00]
-; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 3>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2
- ret <4 x i64> %res
-}
-
-define <4 x i64> @test_masked_z_4xi64_perm_mask3(<4 x i64> %vec, <4 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_4xi64_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,3] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_4xi64_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,3] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 3>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
- ret <4 x i64> %res
-}
-define <4 x i64> @test_4xi64_perm_mem_mask0(<4 x i64>* %vp) {
-; GENERIC-LABEL: test_4xi64_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,1,2,0] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xi64_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,1,2,0] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <4 x i64>, <4 x i64>* %vp
- %res = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 2, i32 0>
- ret <4 x i64> %res
-}
-define <4 x i64> @test_masked_4xi64_perm_mem_mask0(<4 x i64>* %vp, <4 x i64> %vec2, <4 x i64> %mask) {
-; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,2,0] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_4xi64_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,2,0] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <4 x i64>, <4 x i64>* %vp
- %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 2, i32 0>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2
- ret <4 x i64> %res
-}
-
-define <4 x i64> @test_masked_z_4xi64_perm_mem_mask0(<4 x i64>* %vp, <4 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,2,0] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_4xi64_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,2,0] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <4 x i64>, <4 x i64>* %vp
- %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 2, i32 0>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
- ret <4 x i64> %res
-}
-
-define <4 x i64> @test_masked_4xi64_perm_mem_mask1(<4 x i64>* %vp, <4 x i64> %vec2, <4 x i64> %mask) {
-; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,1,1] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_4xi64_perm_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,1,1] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <4 x i64>, <4 x i64>* %vp
- %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2
- ret <4 x i64> %res
-}
-
-define <4 x i64> @test_masked_z_4xi64_perm_mem_mask1(<4 x i64>* %vp, <4 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,1,1] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_4xi64_perm_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,1,1] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <4 x i64>, <4 x i64>* %vp
- %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
- ret <4 x i64> %res
-}
-
-define <4 x i64> @test_masked_4xi64_perm_mem_mask2(<4 x i64>* %vp, <4 x i64> %vec2, <4 x i64> %mask) {
-; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[0,1,2,0] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_4xi64_perm_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[0,1,2,0] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <4 x i64>, <4 x i64>* %vp
- %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 0>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2
- ret <4 x i64> %res
-}
-
-define <4 x i64> @test_masked_z_4xi64_perm_mem_mask2(<4 x i64>* %vp, <4 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,0] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_4xi64_perm_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,0] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <4 x i64>, <4 x i64>* %vp
- %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 0>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
- ret <4 x i64> %res
-}
-
-define <4 x i64> @test_4xi64_perm_mem_mask3(<4 x i64>* %vp) {
-; GENERIC-LABEL: test_4xi64_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,0,1,3] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xi64_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,0,1,3] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <4 x i64>, <4 x i64>* %vp
- %res = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 0, i32 1, i32 3>
- ret <4 x i64> %res
-}
-define <4 x i64> @test_masked_4xi64_perm_mem_mask3(<4 x i64>* %vp, <4 x i64> %vec2, <4 x i64> %mask) {
-; GENERIC-LABEL: test_masked_4xi64_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,0,1,3] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_4xi64_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,0,1,3] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <4 x i64>, <4 x i64>* %vp
- %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 0, i32 1, i32 3>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2
- ret <4 x i64> %res
-}
-
-define <4 x i64> @test_masked_z_4xi64_perm_mem_mask3(<4 x i64>* %vp, <4 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_4xi64_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,0,1,3] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_4xi64_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,0,1,3] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <4 x i64>, <4 x i64>* %vp
- %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 0, i32 1, i32 3>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
- ret <4 x i64> %res
-}
-
-define <8 x i64> @test_8xi64_perm_mask0(<8 x i64> %vec) {
-; GENERIC-LABEL: test_8xi64_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [0,4,7,6,5,5,1,6] sched: [7:0.50]
-; GENERIC-NEXT: vpermpd %zmm0, %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi64_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [0,4,7,6,5,5,1,6] sched: [8:0.50]
-; SKX-NEXT: vpermpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 4, i32 7, i32 6, i32 5, i32 5, i32 1, i32 6>
- ret <8 x i64> %res
-}
-define <8 x i64> @test_masked_8xi64_perm_mask0(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_8xi64_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,4,7,6,5,5,1,6] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi64_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,4,7,6,5,5,1,6] sched: [8:0.50]
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 4, i32 7, i32 6, i32 5, i32 5, i32 1, i32 6>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2
- ret <8 x i64> %res
-}
-
-define <8 x i64> @test_masked_z_8xi64_perm_mask0(<8 x i64> %vec, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi64_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,4,7,6,5,5,1,6] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi64_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,4,7,6,5,5,1,6] sched: [8:0.50]
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 4, i32 7, i32 6, i32 5, i32 5, i32 1, i32 6>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
- ret <8 x i64> %res
-}
-define <8 x i64> @test_masked_8xi64_perm_imm_mask1(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_8xi64_perm_imm_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[1,0,1,1,5,4,5,5] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi64_perm_imm_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[1,0,1,1,5,4,5,5] sched: [3:1.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 0, i32 1, i32 1, i32 5, i32 4, i32 5, i32 5>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2
- ret <8 x i64> %res
-}
-
-define <8 x i64> @test_masked_z_8xi64_perm_imm_mask1(<8 x i64> %vec, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[1,0,1,1,5,4,5,5] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi64_perm_imm_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[1,0,1,1,5,4,5,5] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 0, i32 1, i32 1, i32 5, i32 4, i32 5, i32 5>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
- ret <8 x i64> %res
-}
-define <8 x i64> @test_masked_8xi64_perm_mask2(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_8xi64_perm_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,3,7,3,3,5,4,1] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi64_perm_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,3,7,3,3,5,4,1] sched: [8:0.50]
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 3, i32 7, i32 3, i32 3, i32 5, i32 4, i32 1>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2
- ret <8 x i64> %res
-}
-
-define <8 x i64> @test_masked_z_8xi64_perm_mask2(<8 x i64> %vec, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi64_perm_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,3,7,3,3,5,4,1] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi64_perm_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,3,7,3,3,5,4,1] sched: [8:0.50]
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 3, i32 7, i32 3, i32 3, i32 5, i32 4, i32 1>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
- ret <8 x i64> %res
-}
-define <8 x i64> @test_8xi64_perm_imm_mask3(<8 x i64> %vec) {
-; GENERIC-LABEL: test_8xi64_perm_imm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[3,1,3,1,7,5,7,5] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi64_perm_imm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[3,1,3,1,7,5,7,5] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 1, i32 3, i32 1, i32 7, i32 5, i32 7, i32 5>
- ret <8 x i64> %res
-}
-define <8 x i64> @test_masked_8xi64_perm_imm_mask3(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_8xi64_perm_imm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,1,7,5,7,5] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi64_perm_imm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,1,7,5,7,5] sched: [3:1.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 1, i32 3, i32 1, i32 7, i32 5, i32 7, i32 5>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2
- ret <8 x i64> %res
-}
-
-define <8 x i64> @test_masked_z_8xi64_perm_imm_mask3(<8 x i64> %vec, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,1,7,5,7,5] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi64_perm_imm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,1,7,5,7,5] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 1, i32 3, i32 1, i32 7, i32 5, i32 7, i32 5>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
- ret <8 x i64> %res
-}
-define <8 x i64> @test_masked_8xi64_perm_mask4(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_8xi64_perm_mask4:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [6,3,1,1,7,4,0,3] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi64_perm_mask4:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [6,3,1,1,7,4,0,3] sched: [8:0.50]
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 6, i32 3, i32 1, i32 1, i32 7, i32 4, i32 0, i32 3>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2
- ret <8 x i64> %res
-}
-
-define <8 x i64> @test_masked_z_8xi64_perm_mask4(<8 x i64> %vec, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi64_perm_mask4:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [6,3,1,1,7,4,0,3] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi64_perm_mask4:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [6,3,1,1,7,4,0,3] sched: [8:0.50]
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 6, i32 3, i32 1, i32 1, i32 7, i32 4, i32 0, i32 3>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
- ret <8 x i64> %res
-}
-define <8 x i64> @test_masked_8xi64_perm_imm_mask5(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_8xi64_perm_imm_mask5:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[0,0,0,0,4,4,4,4] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi64_perm_imm_mask5:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[0,0,0,0,4,4,4,4] sched: [3:1.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2
- ret <8 x i64> %res
-}
-
-define <8 x i64> @test_masked_z_8xi64_perm_imm_mask5(<8 x i64> %vec, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mask5:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi64_perm_imm_mask5:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
- ret <8 x i64> %res
-}
-define <8 x i64> @test_8xi64_perm_mask6(<8 x i64> %vec) {
-; GENERIC-LABEL: test_8xi64_perm_mask6:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [5,1,4,4,5,4,2,7] sched: [7:0.50]
-; GENERIC-NEXT: vpermpd %zmm0, %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi64_perm_mask6:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [5,1,4,4,5,4,2,7] sched: [8:0.50]
-; SKX-NEXT: vpermpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 1, i32 4, i32 4, i32 5, i32 4, i32 2, i32 7>
- ret <8 x i64> %res
-}
-define <8 x i64> @test_masked_8xi64_perm_mask6(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_8xi64_perm_mask6:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm3 = [5,1,4,4,5,4,2,7] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi64_perm_mask6:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [5,1,4,4,5,4,2,7] sched: [8:0.50]
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 1, i32 4, i32 4, i32 5, i32 4, i32 2, i32 7>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2
- ret <8 x i64> %res
-}
-
-define <8 x i64> @test_masked_z_8xi64_perm_mask6(<8 x i64> %vec, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi64_perm_mask6:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,1,4,4,5,4,2,7] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi64_perm_mask6:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,1,4,4,5,4,2,7] sched: [8:0.50]
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 1, i32 4, i32 4, i32 5, i32 4, i32 2, i32 7>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
- ret <8 x i64> %res
-}
-define <8 x i64> @test_masked_8xi64_perm_imm_mask7(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_8xi64_perm_imm_mask7:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,3,3,3,7,7,7,7] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi64_perm_imm_mask7:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,3,3,3,7,7,7,7] sched: [3:1.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 7, i32 7, i32 7, i32 7>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2
- ret <8 x i64> %res
-}
-
-define <8 x i64> @test_masked_z_8xi64_perm_imm_mask7(<8 x i64> %vec, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mask7:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,3,3,7,7,7,7] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi64_perm_imm_mask7:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,3,3,7,7,7,7] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 7, i32 7, i32 7, i32 7>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
- ret <8 x i64> %res
-}
-define <8 x i64> @test_8xi64_perm_mem_mask0(<8 x i64>* %vp) {
-; GENERIC-LABEL: test_8xi64_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [5,1,6,5,7,3,7,3] sched: [7:0.50]
-; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi64_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [5,1,6,5,7,3,7,3] sched: [8:0.50]
-; SKX-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i64>, <8 x i64>* %vp
- %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 1, i32 6, i32 5, i32 7, i32 3, i32 7, i32 3>
- ret <8 x i64> %res
-}
-define <8 x i64> @test_masked_8xi64_perm_mem_mask0(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_8xi64_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,1,6,5,7,3,7,3] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi64_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,1,6,5,7,3,7,3] sched: [8:0.50]
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i64>, <8 x i64>* %vp
- %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 1, i32 6, i32 5, i32 7, i32 3, i32 7, i32 3>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2
- ret <8 x i64> %res
-}
-
-define <8 x i64> @test_masked_z_8xi64_perm_mem_mask0(<8 x i64>* %vp, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi64_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,1,6,5,7,3,7,3] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi64_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,1,6,5,7,3,7,3] sched: [8:0.50]
-; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i64>, <8 x i64>* %vp
- %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 1, i32 6, i32 5, i32 7, i32 3, i32 7, i32 3>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
- ret <8 x i64> %res
-}
-
-define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask1(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,1,1,0,5,5,5,4] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi64_perm_imm_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,1,1,0,5,5,5,4] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i64>, <8 x i64>* %vp
- %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 0, i32 5, i32 5, i32 5, i32 4>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2
- ret <8 x i64> %res
-}
-
-define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask1(<8 x i64>* %vp, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,1,1,0,5,5,5,4] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi64_perm_imm_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,1,1,0,5,5,5,4] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i64>, <8 x i64>* %vp
- %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 0, i32 5, i32 5, i32 5, i32 4>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
- ret <8 x i64> %res
-}
-
-define <8 x i64> @test_masked_8xi64_perm_mem_mask2(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_8xi64_perm_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,2,1,4,1,1,5,5] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi64_perm_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,2,1,4,1,1,5,5] sched: [8:0.50]
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i64>, <8 x i64>* %vp
- %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 2, i32 1, i32 4, i32 1, i32 1, i32 5, i32 5>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2
- ret <8 x i64> %res
-}
-
-define <8 x i64> @test_masked_z_8xi64_perm_mem_mask2(<8 x i64>* %vp, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi64_perm_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,2,1,4,1,1,5,5] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi64_perm_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,2,1,4,1,1,5,5] sched: [8:0.50]
-; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i64>, <8 x i64>* %vp
- %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 2, i32 1, i32 4, i32 1, i32 1, i32 5, i32 5>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
- ret <8 x i64> %res
-}
-
-define <8 x i64> @test_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp) {
-; GENERIC-LABEL: test_8xi64_perm_imm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 = mem[1,3,1,1,5,7,5,5] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi64_perm_imm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vpermpd {{.*#+}} zmm0 = mem[1,3,1,1,5,7,5,5] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i64>, <8 x i64>* %vp
- %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 3, i32 1, i32 1, i32 5, i32 7, i32 5, i32 5>
- ret <8 x i64> %res
-}
-define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,3,1,1,5,7,5,5] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi64_perm_imm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,3,1,1,5,7,5,5] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i64>, <8 x i64>* %vp
- %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 3, i32 1, i32 1, i32 5, i32 7, i32 5, i32 5>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2
- ret <8 x i64> %res
-}
-
-define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,3,1,1,5,7,5,5] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi64_perm_imm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,3,1,1,5,7,5,5] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i64>, <8 x i64>* %vp
- %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 3, i32 1, i32 1, i32 5, i32 7, i32 5, i32 5>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
- ret <8 x i64> %res
-}
-
-define <8 x i64> @test_masked_8xi64_perm_mem_mask4(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_8xi64_perm_mem_mask4:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,0,7,0,3,5,0,6] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi64_perm_mem_mask4:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,0,7,0,3,5,0,6] sched: [8:0.50]
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i64>, <8 x i64>* %vp
- %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 0, i32 7, i32 0, i32 3, i32 5, i32 0, i32 6>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2
- ret <8 x i64> %res
-}
-
-define <8 x i64> @test_masked_z_8xi64_perm_mem_mask4(<8 x i64>* %vp, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi64_perm_mem_mask4:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,0,7,0,3,5,0,6] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi64_perm_mem_mask4:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,0,7,0,3,5,0,6] sched: [8:0.50]
-; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i64>, <8 x i64>* %vp
- %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 0, i32 7, i32 0, i32 3, i32 5, i32 0, i32 6>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
- ret <8 x i64> %res
-}
-
-define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask5(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask5:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,1,0,0,7,5,4,4] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi64_perm_imm_mem_mask5:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,1,0,0,7,5,4,4] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i64>, <8 x i64>* %vp
- %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 1, i32 0, i32 0, i32 7, i32 5, i32 4, i32 4>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2
- ret <8 x i64> %res
-}
-
-define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask5(<8 x i64>* %vp, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask5:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,1,0,0,7,5,4,4] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi64_perm_imm_mem_mask5:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,1,0,0,7,5,4,4] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i64>, <8 x i64>* %vp
- %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 1, i32 0, i32 0, i32 7, i32 5, i32 4, i32 4>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
- ret <8 x i64> %res
-}
-
-define <8 x i64> @test_8xi64_perm_mem_mask6(<8 x i64>* %vp) {
-; GENERIC-LABEL: test_8xi64_perm_mem_mask6:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [0,6,3,7,3,0,3,6] sched: [7:0.50]
-; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi64_perm_mem_mask6:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [0,6,3,7,3,0,3,6] sched: [8:0.50]
-; SKX-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i64>, <8 x i64>* %vp
- %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 6, i32 3, i32 7, i32 3, i32 0, i32 3, i32 6>
- ret <8 x i64> %res
-}
-define <8 x i64> @test_masked_8xi64_perm_mem_mask6(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_8xi64_perm_mem_mask6:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,6,3,7,3,0,3,6] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi64_perm_mem_mask6:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,6,3,7,3,0,3,6] sched: [8:0.50]
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i64>, <8 x i64>* %vp
- %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 6, i32 3, i32 7, i32 3, i32 0, i32 3, i32 6>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2
- ret <8 x i64> %res
-}
-
-define <8 x i64> @test_masked_z_8xi64_perm_mem_mask6(<8 x i64>* %vp, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi64_perm_mem_mask6:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,6,3,7,3,0,3,6] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi64_perm_mem_mask6:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,6,3,7,3,0,3,6] sched: [8:0.50]
-; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i64>, <8 x i64>* %vp
- %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 6, i32 3, i32 7, i32 3, i32 0, i32 3, i32 6>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
- ret <8 x i64> %res
-}
-
-define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask7(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_8xi64_perm_imm_mem_mask7:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,0,0,1,7,4,4,5] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi64_perm_imm_mem_mask7:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,0,0,1,7,4,4,5] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i64>, <8 x i64>* %vp
- %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 0, i32 0, i32 1, i32 7, i32 4, i32 4, i32 5>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2
- ret <8 x i64> %res
-}
-
-define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask7(<8 x i64>* %vp, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi64_perm_imm_mem_mask7:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,0,0,1,7,4,4,5] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi64_perm_imm_mem_mask7:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,0,0,1,7,4,4,5] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i64>, <8 x i64>* %vp
- %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 0, i32 0, i32 1, i32 7, i32 4, i32 4, i32 5>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
- ret <8 x i64> %res
-}
-
-define <8 x float> @test_8xfloat_perm_mask0(<8 x float> %vec) {
-; GENERIC-LABEL: test_8xfloat_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [3,4,2,4,1,2,3,4] sched: [7:0.50]
-; GENERIC-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [3,4,2,4,1,2,3,4] sched: [7:0.50]
-; SKX-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 3, i32 4, i32 2, i32 4, i32 1, i32 2, i32 3, i32 4>
- ret <8 x float> %res
-}
-define <8 x float> @test_masked_8xfloat_perm_mask0(<8 x float> %vec, <8 x float> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test_masked_8xfloat_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} ymm3 = [3,4,2,4,1,2,3,4] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xfloat_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} ymm3 = [3,4,2,4,1,2,3,4] sched: [7:0.50]
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [3:1.00]
-; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 3, i32 4, i32 2, i32 4, i32 1, i32 2, i32 3, i32 4>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
- ret <8 x float> %res
-}
-
-define <8 x float> @test_masked_z_8xfloat_perm_mask0(<8 x float> %vec, <8 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_8xfloat_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [3,4,2,4,1,2,3,4] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xfloat_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [3,4,2,4,1,2,3,4] sched: [7:0.50]
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 3, i32 4, i32 2, i32 4, i32 1, i32 2, i32 3, i32 4>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
- ret <8 x float> %res
-}
-define <8 x float> @test_masked_8xfloat_perm_mask1(<8 x float> %vec, <8 x float> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test_masked_8xfloat_perm_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} ymm3 = [4,2,1,0,6,0,5,1] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xfloat_perm_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} ymm3 = [4,2,1,0,6,0,5,1] sched: [7:0.50]
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [3:1.00]
-; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 4, i32 2, i32 1, i32 0, i32 6, i32 0, i32 5, i32 1>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
- ret <8 x float> %res
-}
-
-define <8 x float> @test_masked_z_8xfloat_perm_mask1(<8 x float> %vec, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_8xfloat_perm_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [4,2,1,0,6,0,5,1] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xfloat_perm_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [4,2,1,0,6,0,5,1] sched: [7:0.50]
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 4, i32 2, i32 1, i32 0, i32 6, i32 0, i32 5, i32 1>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
- ret <8 x float> %res
-}
-define <8 x float> @test_masked_8xfloat_perm_mask2(<8 x float> %vec, <8 x float> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test_masked_8xfloat_perm_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} ymm3 = [2,5,5,5,4,6,0,5] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xfloat_perm_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} ymm3 = [2,5,5,5,4,6,0,5] sched: [7:0.50]
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [3:1.00]
-; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 2, i32 5, i32 5, i32 5, i32 4, i32 6, i32 0, i32 5>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
- ret <8 x float> %res
-}
-
-define <8 x float> @test_masked_z_8xfloat_perm_mask2(<8 x float> %vec, <8 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_8xfloat_perm_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [2,5,5,5,4,6,0,5] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xfloat_perm_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [2,5,5,5,4,6,0,5] sched: [7:0.50]
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 2, i32 5, i32 5, i32 5, i32 4, i32 6, i32 0, i32 5>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
- ret <8 x float> %res
-}
-define <8 x float> @test_8xfloat_perm_mask3(<8 x float> %vec) {
-; GENERIC-LABEL: test_8xfloat_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [0,5,2,5,5,5,1,6] sched: [7:0.50]
-; GENERIC-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [0,5,2,5,5,5,1,6] sched: [7:0.50]
-; SKX-NEXT: vpermps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 5, i32 2, i32 5, i32 5, i32 5, i32 1, i32 6>
- ret <8 x float> %res
-}
-define <8 x float> @test_masked_8xfloat_perm_mask3(<8 x float> %vec, <8 x float> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test_masked_8xfloat_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} ymm3 = [0,5,2,5,5,5,1,6] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xfloat_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} ymm3 = [0,5,2,5,5,5,1,6] sched: [7:0.50]
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1} # sched: [3:1.00]
-; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 5, i32 2, i32 5, i32 5, i32 5, i32 1, i32 6>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
- ret <8 x float> %res
-}
-
-define <8 x float> @test_masked_z_8xfloat_perm_mask3(<8 x float> %vec, <8 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_8xfloat_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [0,5,2,5,5,5,1,6] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xfloat_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [0,5,2,5,5,5,1,6] sched: [7:0.50]
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z} # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 5, i32 2, i32 5, i32 5, i32 5, i32 1, i32 6>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
- ret <8 x float> %res
-}
-define <8 x float> @test_8xfloat_perm_mem_mask0(<8 x float>* %vp) {
-; GENERIC-LABEL: test_8xfloat_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} ymm0 = [5,2,1,6,4,2,4,0] sched: [7:0.50]
-; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} ymm0 = [5,2,1,6,4,2,4,0] sched: [7:0.50]
-; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x float>, <8 x float>* %vp
- %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 5, i32 2, i32 1, i32 6, i32 4, i32 2, i32 4, i32 0>
- ret <8 x float> %res
-}
-define <8 x float> @test_masked_8xfloat_perm_mem_mask0(<8 x float>* %vp, <8 x float> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test_masked_8xfloat_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [5,2,1,6,4,2,4,0] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xfloat_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [5,2,1,6,4,2,4,0] sched: [7:0.50]
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x float>, <8 x float>* %vp
- %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 5, i32 2, i32 1, i32 6, i32 4, i32 2, i32 4, i32 0>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
- ret <8 x float> %res
-}
-
-define <8 x float> @test_masked_z_8xfloat_perm_mem_mask0(<8 x float>* %vp, <8 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_8xfloat_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [5,2,1,6,4,2,4,0] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xfloat_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [5,2,1,6,4,2,4,0] sched: [7:0.50]
-; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x float>, <8 x float>* %vp
- %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 5, i32 2, i32 1, i32 6, i32 4, i32 2, i32 4, i32 0>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
- ret <8 x float> %res
-}
-
-define <8 x float> @test_masked_8xfloat_perm_mem_mask1(<8 x float>* %vp, <8 x float> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test_masked_8xfloat_perm_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [1,3,7,4,0,6,6,6] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xfloat_perm_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [1,3,7,4,0,6,6,6] sched: [7:0.50]
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x float>, <8 x float>* %vp
- %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 7, i32 4, i32 0, i32 6, i32 6, i32 6>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
- ret <8 x float> %res
-}
-
-define <8 x float> @test_masked_z_8xfloat_perm_mem_mask1(<8 x float>* %vp, <8 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_8xfloat_perm_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [1,3,7,4,0,6,6,6] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xfloat_perm_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [1,3,7,4,0,6,6,6] sched: [7:0.50]
-; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x float>, <8 x float>* %vp
- %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 7, i32 4, i32 0, i32 6, i32 6, i32 6>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
- ret <8 x float> %res
-}
-
-define <8 x float> @test_masked_8xfloat_perm_mem_mask2(<8 x float>* %vp, <8 x float> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test_masked_8xfloat_perm_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [4,5,1,5,6,6,2,4] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xfloat_perm_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [4,5,1,5,6,6,2,4] sched: [7:0.50]
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x float>, <8 x float>* %vp
- %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 1, i32 5, i32 6, i32 6, i32 2, i32 4>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
- ret <8 x float> %res
-}
-
-define <8 x float> @test_masked_z_8xfloat_perm_mem_mask2(<8 x float>* %vp, <8 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_8xfloat_perm_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [4,5,1,5,6,6,2,4] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xfloat_perm_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [4,5,1,5,6,6,2,4] sched: [7:0.50]
-; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x float>, <8 x float>* %vp
- %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 1, i32 5, i32 6, i32 6, i32 2, i32 4>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
- ret <8 x float> %res
-}
-
-define <8 x float> @test_8xfloat_perm_mem_mask3(<8 x float>* %vp, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} ymm0 = [5,7,0,6,4,2,3,0] sched: [7:0.50]
-; GENERIC-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} ymm0 = [5,7,0,6,4,2,3,0] sched: [7:0.50]
-; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x float>, <8 x float>* %vp
- %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 5, i32 7, i32 0, i32 6, i32 4, i32 2, i32 3, i32 0>
- ret <8 x float> %res
-}
-define <8 x float> @test_masked_8xfloat_perm_mem_mask3(<8 x float>* %vp, <8 x float> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test_masked_8xfloat_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} ymm2 = [5,7,0,6,4,2,3,0] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xfloat_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} ymm2 = [5,7,0,6,4,2,3,0] sched: [7:0.50]
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x float>, <8 x float>* %vp
- %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 5, i32 7, i32 0, i32 6, i32 4, i32 2, i32 3, i32 0>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
- ret <8 x float> %res
-}
-
-define <8 x float> @test_masked_z_8xfloat_perm_mem_mask3(<8 x float>* %vp, <8 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_8xfloat_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} ymm1 = [5,7,0,6,4,2,3,0] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xfloat_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} ymm1 = [5,7,0,6,4,2,3,0] sched: [7:0.50]
-; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x float>, <8 x float>* %vp
- %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 5, i32 7, i32 0, i32 6, i32 4, i32 2, i32 3, i32 0>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
- ret <8 x float> %res
-}
-
-define <16 x float> @test_16xfloat_perm_mask0(<16 x float> %vec) {
-; GENERIC-LABEL: test_16xfloat_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [7:0.50]
-; GENERIC-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [8:0.50]
-; SKX-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 15, i32 7, i32 5, i32 13, i32 4, i32 9, i32 11, i32 13, i32 12, i32 6, i32 0, i32 0, i32 11, i32 15, i32 5, i32 7>
- ret <16 x float> %res
-}
-define <16 x float> @test_masked_16xfloat_perm_mask0(<16 x float> %vec, <16 x float> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test_masked_16xfloat_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} zmm3 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xfloat_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} zmm3 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [8:0.50]
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00]
-; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 15, i32 7, i32 5, i32 13, i32 4, i32 9, i32 11, i32 13, i32 12, i32 6, i32 0, i32 0, i32 11, i32 15, i32 5, i32 7>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
- ret <16 x float> %res
-}
-
-define <16 x float> @test_masked_z_16xfloat_perm_mask0(<16 x float> %vec, <16 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_16xfloat_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xfloat_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7] sched: [8:0.50]
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 15, i32 7, i32 5, i32 13, i32 4, i32 9, i32 11, i32 13, i32 12, i32 6, i32 0, i32 0, i32 11, i32 15, i32 5, i32 7>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
- ret <16 x float> %res
-}
-define <16 x float> @test_masked_16xfloat_perm_mask1(<16 x float> %vec, <16 x float> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test_masked_16xfloat_perm_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} zmm3 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xfloat_perm_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} zmm3 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1] sched: [8:0.50]
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00]
-; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 11, i32 10, i32 4, i32 10, i32 4, i32 5, i32 8, i32 11, i32 2, i32 0, i32 10, i32 0, i32 0, i32 3, i32 10, i32 1>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
- ret <16 x float> %res
-}
-
-define <16 x float> @test_masked_z_16xfloat_perm_mask1(<16 x float> %vec, <16 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_16xfloat_perm_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xfloat_perm_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1] sched: [8:0.50]
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 11, i32 10, i32 4, i32 10, i32 4, i32 5, i32 8, i32 11, i32 2, i32 0, i32 10, i32 0, i32 0, i32 3, i32 10, i32 1>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
- ret <16 x float> %res
-}
-define <16 x float> @test_masked_16xfloat_perm_mask2(<16 x float> %vec, <16 x float> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test_masked_16xfloat_perm_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} zmm3 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xfloat_perm_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} zmm3 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11] sched: [8:0.50]
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00]
-; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 15, i32 6, i32 14, i32 3, i32 6, i32 5, i32 2, i32 5, i32 15, i32 11, i32 6, i32 6, i32 4, i32 8, i32 11>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
- ret <16 x float> %res
-}
-
-define <16 x float> @test_masked_z_16xfloat_perm_mask2(<16 x float> %vec, <16 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_16xfloat_perm_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xfloat_perm_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11] sched: [8:0.50]
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 15, i32 6, i32 14, i32 3, i32 6, i32 5, i32 2, i32 5, i32 15, i32 11, i32 6, i32 6, i32 4, i32 8, i32 11>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
- ret <16 x float> %res
-}
-define <16 x float> @test_16xfloat_perm_mask3(<16 x float> %vec) {
-; GENERIC-LABEL: test_16xfloat_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [7:0.50]
-; GENERIC-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [8:0.50]
-; SKX-NEXT: vpermps %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 7, i32 0, i32 14, i32 6, i32 6, i32 0, i32 2, i32 13, i32 8, i32 11, i32 2, i32 5, i32 13, i32 13, i32 3>
- ret <16 x float> %res
-}
-define <16 x float> @test_masked_16xfloat_perm_mask3(<16 x float> %vec, <16 x float> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test_masked_16xfloat_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} zmm3 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xfloat_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} zmm3 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [8:0.50]
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00]
-; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 7, i32 0, i32 14, i32 6, i32 6, i32 0, i32 2, i32 13, i32 8, i32 11, i32 2, i32 5, i32 13, i32 13, i32 3>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
- ret <16 x float> %res
-}
-
-define <16 x float> @test_masked_z_16xfloat_perm_mask3(<16 x float> %vec, <16 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_16xfloat_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xfloat_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3] sched: [8:0.50]
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 7, i32 0, i32 14, i32 6, i32 6, i32 0, i32 2, i32 13, i32 8, i32 11, i32 2, i32 5, i32 13, i32 13, i32 3>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
- ret <16 x float> %res
-}
-define <16 x float> @test_16xfloat_perm_mem_mask0(<16 x float>* %vp) {
-; GENERIC-LABEL: test_16xfloat_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [7:0.50]
-; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [8:0.50]
-; SKX-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x float>, <16 x float>* %vp
- %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 2, i32 1, i32 14, i32 9, i32 9, i32 7, i32 2, i32 9, i32 4, i32 12, i32 11, i32 0, i32 14, i32 0, i32 1>
- ret <16 x float> %res
-}
-define <16 x float> @test_masked_16xfloat_perm_mem_mask0(<16 x float>* %vp, <16 x float> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test_masked_16xfloat_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xfloat_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [8:0.50]
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x float>, <16 x float>* %vp
- %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 2, i32 1, i32 14, i32 9, i32 9, i32 7, i32 2, i32 9, i32 4, i32 12, i32 11, i32 0, i32 14, i32 0, i32 1>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
- ret <16 x float> %res
-}
-
-define <16 x float> @test_masked_z_16xfloat_perm_mem_mask0(<16 x float>* %vp, <16 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_16xfloat_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xfloat_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1] sched: [8:0.50]
-; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x float>, <16 x float>* %vp
- %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 2, i32 1, i32 14, i32 9, i32 9, i32 7, i32 2, i32 9, i32 4, i32 12, i32 11, i32 0, i32 14, i32 0, i32 1>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
- ret <16 x float> %res
-}
-
-define <16 x float> @test_masked_16xfloat_perm_mem_mask1(<16 x float>* %vp, <16 x float> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test_masked_16xfloat_perm_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xfloat_perm_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] sched: [8:0.50]
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x float>, <16 x float>* %vp
- %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 4, i32 2, i32 3, i32 5, i32 11, i32 6, i32 4, i32 7, i32 6, i32 4, i32 14, i32 8, i32 15, i32 12, i32 9, i32 4>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
- ret <16 x float> %res
-}
-
-define <16 x float> @test_masked_z_16xfloat_perm_mem_mask1(<16 x float>* %vp, <16 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_16xfloat_perm_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xfloat_perm_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4] sched: [8:0.50]
-; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x float>, <16 x float>* %vp
- %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 4, i32 2, i32 3, i32 5, i32 11, i32 6, i32 4, i32 7, i32 6, i32 4, i32 14, i32 8, i32 15, i32 12, i32 9, i32 4>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
- ret <16 x float> %res
-}
-
-define <16 x float> @test_masked_16xfloat_perm_mem_mask2(<16 x float>* %vp, <16 x float> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test_masked_16xfloat_perm_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xfloat_perm_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] sched: [8:0.50]
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x float>, <16 x float>* %vp
- %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 7, i32 11, i32 6, i32 7, i32 0, i32 11, i32 0, i32 10, i32 9, i32 12, i32 4, i32 10, i32 3, i32 8, i32 5>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
- ret <16 x float> %res
-}
-
-define <16 x float> @test_masked_z_16xfloat_perm_mem_mask2(<16 x float>* %vp, <16 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_16xfloat_perm_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xfloat_perm_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5] sched: [8:0.50]
-; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x float>, <16 x float>* %vp
- %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 7, i32 11, i32 6, i32 7, i32 0, i32 11, i32 0, i32 10, i32 9, i32 12, i32 4, i32 10, i32 3, i32 8, i32 5>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
- ret <16 x float> %res
-}
-
-define <16 x float> @test_16xfloat_perm_mem_mask3(<16 x float>* %vp) {
-; GENERIC-LABEL: test_16xfloat_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [7:0.50]
-; GENERIC-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [8:0.50]
-; SKX-NEXT: vpermps (%rdi), %zmm0, %zmm0 # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x float>, <16 x float>* %vp
- %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 15, i32 15, i32 3, i32 9, i32 5, i32 15, i32 14, i32 9, i32 11, i32 10, i32 5, i32 14, i32 14, i32 5, i32 11, i32 0>
- ret <16 x float> %res
-}
-define <16 x float> @test_masked_16xfloat_perm_mem_mask3(<16 x float>* %vp, <16 x float> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test_masked_16xfloat_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} zmm2 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xfloat_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} zmm2 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [8:0.50]
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x float>, <16 x float>* %vp
- %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 15, i32 15, i32 3, i32 9, i32 5, i32 15, i32 14, i32 9, i32 11, i32 10, i32 5, i32 14, i32 14, i32 5, i32 11, i32 0>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
- ret <16 x float> %res
-}
-
-define <16 x float> @test_masked_z_16xfloat_perm_mem_mask3(<16 x float>* %vp, <16 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_16xfloat_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xfloat_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0] sched: [8:0.50]
-; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x float>, <16 x float>* %vp
- %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 15, i32 15, i32 3, i32 9, i32 5, i32 15, i32 14, i32 9, i32 11, i32 10, i32 5, i32 14, i32 14, i32 5, i32 11, i32 0>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
- ret <16 x float> %res
-}
-
-define <4 x double> @test_4xdouble_perm_mask0(<4 x double> %vec) {
-; GENERIC-LABEL: test_4xdouble_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,2] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,2] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 2>
- ret <4 x double> %res
-}
-define <4 x double> @test_masked_4xdouble_perm_mask0(<4 x double> %vec, <4 x double> %vec2, <4 x i64> %mask) {
-; GENERIC-LABEL: test_masked_4xdouble_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,2] sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_4xdouble_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,2] sched: [3:1.00]
-; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 2>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2
- ret <4 x double> %res
-}
-
-define <4 x double> @test_masked_z_4xdouble_perm_mask0(<4 x double> %vec, <4 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_4xdouble_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,2] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_4xdouble_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,2] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 2>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
- ret <4 x double> %res
-}
-define <4 x double> @test_masked_4xdouble_perm_mask1(<4 x double> %vec, <4 x double> %vec2, <4 x i64> %mask) {
-; GENERIC-LABEL: test_masked_4xdouble_perm_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,0,0,0] sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_4xdouble_perm_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,0,0,0] sched: [3:1.00]
-; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2
- ret <4 x double> %res
-}
-
-define <4 x double> @test_masked_z_4xdouble_perm_mask1(<4 x double> %vec, <4 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_4xdouble_perm_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,0,0,0] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_4xdouble_perm_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,0,0,0] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
- ret <4 x double> %res
-}
-define <4 x double> @test_masked_4xdouble_perm_mask2(<4 x double> %vec, <4 x double> %vec2, <4 x i64> %mask) {
-; GENERIC-LABEL: test_masked_4xdouble_perm_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,1] sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_4xdouble_perm_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,1] sched: [3:1.00]
-; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 3, i32 3, i32 1>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2
- ret <4 x double> %res
-}
-
-define <4 x double> @test_masked_z_4xdouble_perm_mask2(<4 x double> %vec, <4 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_4xdouble_perm_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,1] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_4xdouble_perm_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,1] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 3, i32 3, i32 1>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
- ret <4 x double> %res
-}
-define <4 x double> @test_4xdouble_perm_mask3(<4 x double> %vec) {
-; GENERIC-LABEL: test_4xdouble_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,2] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,2] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 2>
- ret <4 x double> %res
-}
-define <4 x double> @test_masked_4xdouble_perm_mask3(<4 x double> %vec, <4 x double> %vec2, <4 x i64> %mask) {
-; GENERIC-LABEL: test_masked_4xdouble_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,2] sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_4xdouble_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,2] sched: [3:1.00]
-; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 2>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2
- ret <4 x double> %res
-}
-
-define <4 x double> @test_masked_z_4xdouble_perm_mask3(<4 x double> %vec, <4 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_4xdouble_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,2] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_4xdouble_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,2] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 2>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
- ret <4 x double> %res
-}
-define <4 x double> @test_4xdouble_perm_mem_mask0(<4 x double>* %vp) {
-; GENERIC-LABEL: test_4xdouble_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[0,0,2,0] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vpermpd {{.*#+}} ymm0 = mem[0,0,2,0] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <4 x double>, <4 x double>* %vp
- %res = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
- ret <4 x double> %res
-}
-define <4 x double> @test_masked_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 x double> %vec2, <4 x i64> %mask) {
-; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,0,2,0] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_4xdouble_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,0,2,0] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <4 x double>, <4 x double>* %vp
- %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2
- ret <4 x double> %res
-}
-
-define <4 x double> @test_masked_z_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,0] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_4xdouble_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,0] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <4 x double>, <4 x double>* %vp
- %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
- ret <4 x double> %res
-}
-
-define <4 x double> @test_masked_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 x double> %vec2, <4 x i64> %mask) {
-; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,2,3,2] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_4xdouble_perm_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,2,3,2] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <4 x double>, <4 x double>* %vp
- %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 2, i32 3, i32 2>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2
- ret <4 x double> %res
-}
-
-define <4 x double> @test_masked_z_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,2,3,2] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_4xdouble_perm_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,2,3,2] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <4 x double>, <4 x double>* %vp
- %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 2, i32 3, i32 2>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
- ret <4 x double> %res
-}
-
-define <4 x double> @test_masked_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 x double> %vec2, <4 x i64> %mask) {
-; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,1,1,1] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_4xdouble_perm_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,1,1,1] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <4 x double>, <4 x double>* %vp
- %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2
- ret <4 x double> %res
-}
-
-define <4 x double> @test_masked_z_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,1,1,1] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_4xdouble_perm_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,1,1,1] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <4 x double>, <4 x double>* %vp
- %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
- ret <4 x double> %res
-}
-
-define <4 x double> @test_4xdouble_perm_mem_mask3(<4 x double>* %vp) {
-; GENERIC-LABEL: test_4xdouble_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 = mem[3,2,3,2] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vpermpd {{.*#+}} ymm0 = mem[3,2,3,2] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <4 x double>, <4 x double>* %vp
- %res = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 3, i32 2>
- ret <4 x double> %res
-}
-define <4 x double> @test_masked_4xdouble_perm_mem_mask3(<4 x double>* %vp, <4 x double> %vec2, <4 x i64> %mask) {
-; GENERIC-LABEL: test_masked_4xdouble_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,2,3,2] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_4xdouble_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,2,3,2] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <4 x double>, <4 x double>* %vp
- %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 3, i32 2>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2
- ret <4 x double> %res
-}
-
-define <4 x double> @test_masked_z_4xdouble_perm_mem_mask3(<4 x double>* %vp, <4 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_4xdouble_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,2] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_4xdouble_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm0, %ymm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,2] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <4 x double>, <4 x double>* %vp
- %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 3, i32 2>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
- ret <4 x double> %res
-}
-
-define <8 x double> @test_8xdouble_perm_mask0(<8 x double> %vec) {
-; GENERIC-LABEL: test_8xdouble_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [5,7,4,2,7,4,3,4] sched: [7:0.50]
-; GENERIC-NEXT: vpermpd %zmm0, %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [5,7,4,2,7,4,3,4] sched: [8:0.50]
-; SKX-NEXT: vpermpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 5, i32 7, i32 4, i32 2, i32 7, i32 4, i32 3, i32 4>
- ret <8 x double> %res
-}
-define <8 x double> @test_masked_8xdouble_perm_mask0(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_8xdouble_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovapd {{.*#+}} zmm3 = [5,7,4,2,7,4,3,4] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xdouble_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovapd {{.*#+}} zmm3 = [5,7,4,2,7,4,3,4] sched: [8:0.50]
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00]
-; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 5, i32 7, i32 4, i32 2, i32 7, i32 4, i32 3, i32 4>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
- ret <8 x double> %res
-}
-
-define <8 x double> @test_masked_z_8xdouble_perm_mask0(<8 x double> %vec, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_8xdouble_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [5,7,4,2,7,4,3,4] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xdouble_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [5,7,4,2,7,4,3,4] sched: [8:0.50]
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 5, i32 7, i32 4, i32 2, i32 7, i32 4, i32 3, i32 4>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-define <8 x double> @test_masked_8xdouble_perm_imm_mask1(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,0,0,2,7,4,4,6] sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xdouble_perm_imm_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,0,0,2,7,4,4,6] sched: [3:1.00]
-; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 0, i32 0, i32 2, i32 7, i32 4, i32 4, i32 6>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
- ret <8 x double> %res
-}
-
-define <8 x double> @test_masked_z_8xdouble_perm_imm_mask1(<8 x double> %vec, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,0,2,7,4,4,6] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,0,2,7,4,4,6] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 0, i32 0, i32 2, i32 7, i32 4, i32 4, i32 6>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-define <8 x double> @test_masked_8xdouble_perm_mask2(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_8xdouble_perm_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovapd {{.*#+}} zmm3 = [7,5,5,5,3,5,1,7] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xdouble_perm_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovapd {{.*#+}} zmm3 = [7,5,5,5,3,5,1,7] sched: [8:0.50]
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00]
-; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 7, i32 5, i32 5, i32 5, i32 3, i32 5, i32 1, i32 7>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
- ret <8 x double> %res
-}
-
-define <8 x double> @test_masked_z_8xdouble_perm_mask2(<8 x double> %vec, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_8xdouble_perm_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [7,5,5,5,3,5,1,7] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xdouble_perm_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [7,5,5,5,3,5,1,7] sched: [8:0.50]
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 7, i32 5, i32 5, i32 5, i32 3, i32 5, i32 1, i32 7>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-define <8 x double> @test_8xdouble_perm_imm_mask3(<8 x double> %vec) {
-; GENERIC-LABEL: test_8xdouble_perm_imm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[1,3,3,0,5,7,7,4] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_perm_imm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[1,3,3,0,5,7,7,4] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 1, i32 3, i32 3, i32 0, i32 5, i32 7, i32 7, i32 4>
- ret <8 x double> %res
-}
-define <8 x double> @test_masked_8xdouble_perm_imm_mask3(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4] sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xdouble_perm_imm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4] sched: [3:1.00]
-; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 1, i32 3, i32 3, i32 0, i32 5, i32 7, i32 7, i32 4>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
- ret <8 x double> %res
-}
-
-define <8 x double> @test_masked_z_8xdouble_perm_imm_mask3(<8 x double> %vec, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 1, i32 3, i32 3, i32 0, i32 5, i32 7, i32 7, i32 4>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-define <8 x double> @test_masked_8xdouble_perm_mask4(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_8xdouble_perm_mask4:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovapd {{.*#+}} zmm3 = [3,5,3,4,6,5,7,1] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xdouble_perm_mask4:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovapd {{.*#+}} zmm3 = [3,5,3,4,6,5,7,1] sched: [8:0.50]
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00]
-; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 5, i32 3, i32 4, i32 6, i32 5, i32 7, i32 1>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
- ret <8 x double> %res
-}
-
-define <8 x double> @test_masked_z_8xdouble_perm_mask4(<8 x double> %vec, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_8xdouble_perm_mask4:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [3,5,3,4,6,5,7,1] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xdouble_perm_mask4:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [3,5,3,4,6,5,7,1] sched: [8:0.50]
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 5, i32 3, i32 4, i32 6, i32 5, i32 7, i32 1>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-define <8 x double> @test_masked_8xdouble_perm_imm_mask5(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mask5:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,3,2,3,7,7,6,7] sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xdouble_perm_imm_mask5:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,3,2,3,7,7,6,7] sched: [3:1.00]
-; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 3, i32 2, i32 3, i32 7, i32 7, i32 6, i32 7>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
- ret <8 x double> %res
-}
-
-define <8 x double> @test_masked_z_8xdouble_perm_imm_mask5(<8 x double> %vec, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mask5:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,2,3,7,7,6,7] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mask5:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,2,3,7,7,6,7] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 3, i32 2, i32 3, i32 7, i32 7, i32 6, i32 7>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-define <8 x double> @test_8xdouble_perm_mask6(<8 x double> %vec) {
-; GENERIC-LABEL: test_8xdouble_perm_mask6:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} zmm1 = [2,7,6,4,0,0,0,2] sched: [7:0.50]
-; GENERIC-NEXT: vpermpd %zmm0, %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_perm_mask6:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} zmm1 = [2,7,6,4,0,0,0,2] sched: [8:0.50]
-; SKX-NEXT: vpermpd %zmm0, %zmm1, %zmm0 # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 7, i32 6, i32 4, i32 0, i32 0, i32 0, i32 2>
- ret <8 x double> %res
-}
-define <8 x double> @test_masked_8xdouble_perm_mask6(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_8xdouble_perm_mask6:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovapd {{.*#+}} zmm3 = [2,7,6,4,0,0,0,2] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xdouble_perm_mask6:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovapd {{.*#+}} zmm3 = [2,7,6,4,0,0,0,2] sched: [8:0.50]
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1} # sched: [3:1.00]
-; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 7, i32 6, i32 4, i32 0, i32 0, i32 0, i32 2>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
- ret <8 x double> %res
-}
-
-define <8 x double> @test_masked_z_8xdouble_perm_mask6(<8 x double> %vec, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_8xdouble_perm_mask6:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [2,7,6,4,0,0,0,2] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xdouble_perm_mask6:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [2,7,6,4,0,0,0,2] sched: [8:0.50]
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z} # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 7, i32 6, i32 4, i32 0, i32 0, i32 0, i32 2>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-define <8 x double> @test_masked_8xdouble_perm_imm_mask7(<8 x double> %vec, <8 x double> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mask7:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,2,7,5,7,6] sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xdouble_perm_imm_mask7:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,2,7,5,7,6] sched: [3:1.00]
-; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 1, i32 3, i32 2, i32 7, i32 5, i32 7, i32 6>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
- ret <8 x double> %res
-}
-
-define <8 x double> @test_masked_z_8xdouble_perm_imm_mask7(<8 x double> %vec, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mask7:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,2,7,5,7,6] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mask7:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,2,7,5,7,6] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 1, i32 3, i32 2, i32 7, i32 5, i32 7, i32 6>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-define <8 x double> @test_8xdouble_perm_mem_mask0(<8 x double>* %vp) {
-; GENERIC-LABEL: test_8xdouble_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [0,3,4,0,4,2,0,1] sched: [7:0.50]
-; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [0,3,4,0,4,2,0,1] sched: [8:0.50]
-; SKX-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x double>, <8 x double>* %vp
- %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 3, i32 4, i32 0, i32 4, i32 2, i32 0, i32 1>
- ret <8 x double> %res
-}
-define <8 x double> @test_masked_8xdouble_perm_mem_mask0(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_8xdouble_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [0,3,4,0,4,2,0,1] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xdouble_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [0,3,4,0,4,2,0,1] sched: [8:0.50]
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x double>, <8 x double>* %vp
- %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 3, i32 4, i32 0, i32 4, i32 2, i32 0, i32 1>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
- ret <8 x double> %res
-}
-
-define <8 x double> @test_masked_z_8xdouble_perm_mem_mask0(<8 x double>* %vp, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_8xdouble_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [0,3,4,0,4,2,0,1] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xdouble_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovapd {{.*#+}} zmm1 = [0,3,4,0,4,2,0,1] sched: [8:0.50]
-; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x double>, <8 x double>* %vp
- %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 3, i32 4, i32 0, i32 4, i32 2, i32 0, i32 1>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-
-define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask1(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,2,0,3,4,6,4,7] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xdouble_perm_imm_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,2,0,3,4,6,4,7] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x double>, <8 x double>* %vp
- %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 2, i32 0, i32 3, i32 4, i32 6, i32 4, i32 7>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
- ret <8 x double> %res
-}
-
-define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask1(<8 x double>* %vp, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,2,0,3,4,6,4,7] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,2,0,3,4,6,4,7] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x double>, <8 x double>* %vp
- %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 2, i32 0, i32 3, i32 4, i32 6, i32 4, i32 7>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-
-define <8 x double> @test_masked_8xdouble_perm_mem_mask2(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_8xdouble_perm_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [6,7,2,7,7,6,2,5] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xdouble_perm_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [6,7,2,7,7,6,2,5] sched: [8:0.50]
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x double>, <8 x double>* %vp
- %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 6, i32 7, i32 2, i32 7, i32 7, i32 6, i32 2, i32 5>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
- ret <8 x double> %res
-}
-
-define <8 x double> @test_masked_z_8xdouble_perm_mem_mask2(<8 x double>* %vp, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_8xdouble_perm_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [6,7,2,7,7,6,2,5] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xdouble_perm_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovapd {{.*#+}} zmm1 = [6,7,2,7,7,6,2,5] sched: [8:0.50]
-; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x double>, <8 x double>* %vp
- %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 6, i32 7, i32 2, i32 7, i32 7, i32 6, i32 2, i32 5>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-
-define <8 x double> @test_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp) {
-; GENERIC-LABEL: test_8xdouble_perm_imm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 = mem[2,1,1,0,6,5,5,4] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_perm_imm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vpermpd {{.*#+}} zmm0 = mem[2,1,1,0,6,5,5,4] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x double>, <8 x double>* %vp
- %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 1, i32 1, i32 0, i32 6, i32 5, i32 5, i32 4>
- ret <8 x double> %res
-}
-define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,1,1,0,6,5,5,4] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xdouble_perm_imm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,1,1,0,6,5,5,4] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x double>, <8 x double>* %vp
- %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 1, i32 1, i32 0, i32 6, i32 5, i32 5, i32 4>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
- ret <8 x double> %res
-}
-
-define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,1,1,0,6,5,5,4] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,1,1,0,6,5,5,4] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x double>, <8 x double>* %vp
- %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 1, i32 1, i32 0, i32 6, i32 5, i32 5, i32 4>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-
-define <8 x double> @test_masked_8xdouble_perm_mem_mask4(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_8xdouble_perm_mem_mask4:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [1,1,3,5,6,0,6,0] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xdouble_perm_mem_mask4:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [1,1,3,5,6,0,6,0] sched: [8:0.50]
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x double>, <8 x double>* %vp
- %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 5, i32 6, i32 0, i32 6, i32 0>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
- ret <8 x double> %res
-}
-
-define <8 x double> @test_masked_z_8xdouble_perm_mem_mask4(<8 x double>* %vp, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_8xdouble_perm_mem_mask4:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [1,1,3,5,6,0,6,0] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xdouble_perm_mem_mask4:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovapd {{.*#+}} zmm1 = [1,1,3,5,6,0,6,0] sched: [8:0.50]
-; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x double>, <8 x double>* %vp
- %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 5, i32 6, i32 0, i32 6, i32 0>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-
-define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask5(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask5:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,2,2,3,6,6,6,7] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xdouble_perm_imm_mem_mask5:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,2,2,3,6,6,6,7] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x double>, <8 x double>* %vp
- %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 3, i32 6, i32 6, i32 6, i32 7>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
- ret <8 x double> %res
-}
-
-define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask5(<8 x double>* %vp, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask5:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,2,2,3,6,6,6,7] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask5:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,2,2,3,6,6,6,7] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x double>, <8 x double>* %vp
- %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 3, i32 6, i32 6, i32 6, i32 7>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-
-define <8 x double> @test_8xdouble_perm_mem_mask6(<8 x double>* %vp) {
-; GENERIC-LABEL: test_8xdouble_perm_mem_mask6:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovaps {{.*#+}} zmm0 = [2,4,0,4,6,1,2,5] sched: [7:0.50]
-; GENERIC-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_perm_mem_mask6:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps {{.*#+}} zmm0 = [2,4,0,4,6,1,2,5] sched: [8:0.50]
-; SKX-NEXT: vpermpd (%rdi), %zmm0, %zmm0 # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x double>, <8 x double>* %vp
- %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 4, i32 0, i32 4, i32 6, i32 1, i32 2, i32 5>
- ret <8 x double> %res
-}
-define <8 x double> @test_masked_8xdouble_perm_mem_mask6(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_8xdouble_perm_mem_mask6:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovapd {{.*#+}} zmm2 = [2,4,0,4,6,1,2,5] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xdouble_perm_mem_mask6:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovapd {{.*#+}} zmm2 = [2,4,0,4,6,1,2,5] sched: [8:0.50]
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x double>, <8 x double>* %vp
- %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 4, i32 0, i32 4, i32 6, i32 1, i32 2, i32 5>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
- ret <8 x double> %res
-}
-
-define <8 x double> @test_masked_z_8xdouble_perm_mem_mask6(<8 x double>* %vp, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_8xdouble_perm_mem_mask6:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovapd {{.*#+}} zmm1 = [2,4,0,4,6,1,2,5] sched: [7:0.50]
-; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xdouble_perm_mem_mask6:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovapd {{.*#+}} zmm1 = [2,4,0,4,6,1,2,5] sched: [8:0.50]
-; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z} # sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x double>, <8 x double>* %vp
- %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 4, i32 0, i32 4, i32 6, i32 1, i32 2, i32 5>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-
-define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask7(<8 x double>* %vp, <8 x double> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_8xdouble_perm_imm_mem_mask7:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xdouble_perm_imm_mem_mask7:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x double>, <8 x double>* %vp
- %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 3, i32 2, i32 0, i32 4, i32 7, i32 6, i32 4>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
- ret <8 x double> %res
-}
-
-define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask7(<8 x double>* %vp, <8 x i64> %mask) {
-; GENERIC-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask7:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask7:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x double>, <8 x double>* %vp
- %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 3, i32 2, i32 0, i32 4, i32 7, i32 6, i32 4>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-
-define <16 x i8> @test_16xi8_perm_mask0(<16 x i8> %vec) {
-; GENERIC-LABEL: test_16xi8_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xi8_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 8, i32 6, i32 12, i32 4, i32 7, i32 9, i32 14, i32 8, i32 4, i32 12, i32 9, i32 4, i32 14, i32 15, i32 12, i32 14>
- ret <16 x i8> %res
-}
-define <16 x i8> @test_masked_16xi8_perm_mask0(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) {
-; GENERIC-LABEL: test_masked_16xi8_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50]
-; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi8_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:1.00]
-; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 8, i32 6, i32 12, i32 4, i32 7, i32 9, i32 14, i32 8, i32 4, i32 12, i32 9, i32 4, i32 14, i32 15, i32 12, i32 14>
- %cmp = icmp eq <16 x i8> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2
- ret <16 x i8> %res
-}
-
-define <16 x i8> @test_masked_z_16xi8_perm_mask0(<16 x i8> %vec, <16 x i8> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi8_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi8_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 8, i32 6, i32 12, i32 4, i32 7, i32 9, i32 14, i32 8, i32 4, i32 12, i32 9, i32 4, i32 14, i32 15, i32 12, i32 14>
- %cmp = icmp eq <16 x i8> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
- ret <16 x i8> %res
-}
-define <16 x i8> @test_masked_16xi8_perm_mask1(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) {
-; GENERIC-LABEL: test_masked_16xi8_perm_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50]
-; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi8_perm_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:1.00]
-; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 4, i32 11, i32 14, i32 10, i32 7, i32 1, i32 6, i32 9, i32 14, i32 15, i32 7, i32 13, i32 4, i32 12, i32 8, i32 0>
- %cmp = icmp eq <16 x i8> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2
- ret <16 x i8> %res
-}
-
-define <16 x i8> @test_masked_z_16xi8_perm_mask1(<16 x i8> %vec, <16 x i8> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi8_perm_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi8_perm_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 4, i32 11, i32 14, i32 10, i32 7, i32 1, i32 6, i32 9, i32 14, i32 15, i32 7, i32 13, i32 4, i32 12, i32 8, i32 0>
- %cmp = icmp eq <16 x i8> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
- ret <16 x i8> %res
-}
-define <16 x i8> @test_masked_16xi8_perm_mask2(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) {
-; GENERIC-LABEL: test_masked_16xi8_perm_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50]
-; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi8_perm_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:1.00]
-; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 11, i32 6, i32 13, i32 10, i32 0, i32 7, i32 13, i32 3, i32 5, i32 13, i32 3, i32 9, i32 3, i32 15, i32 12, i32 7>
- %cmp = icmp eq <16 x i8> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2
- ret <16 x i8> %res
-}
-
-define <16 x i8> @test_masked_z_16xi8_perm_mask2(<16 x i8> %vec, <16 x i8> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi8_perm_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi8_perm_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 11, i32 6, i32 13, i32 10, i32 0, i32 7, i32 13, i32 3, i32 5, i32 13, i32 3, i32 9, i32 3, i32 15, i32 12, i32 7>
- %cmp = icmp eq <16 x i8> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
- ret <16 x i8> %res
-}
-define <16 x i8> @test_16xi8_perm_mask3(<16 x i8> %vec) {
-; GENERIC-LABEL: test_16xi8_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xi8_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 1, i32 5, i32 8, i32 14, i32 1, i32 8, i32 11, i32 8, i32 13, i32 8, i32 15, i32 9, i32 9, i32 7, i32 9, i32 6>
- ret <16 x i8> %res
-}
-define <16 x i8> @test_masked_16xi8_perm_mask3(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) {
-; GENERIC-LABEL: test_masked_16xi8_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50]
-; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi8_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:1.00]
-; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 1, i32 5, i32 8, i32 14, i32 1, i32 8, i32 11, i32 8, i32 13, i32 8, i32 15, i32 9, i32 9, i32 7, i32 9, i32 6>
- %cmp = icmp eq <16 x i8> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2
- ret <16 x i8> %res
-}
-
-define <16 x i8> @test_masked_z_16xi8_perm_mask3(<16 x i8> %vec, <16 x i8> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi8_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi8_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 1, i32 5, i32 8, i32 14, i32 1, i32 8, i32 11, i32 8, i32 13, i32 8, i32 15, i32 9, i32 9, i32 7, i32 9, i32 6>
- %cmp = icmp eq <16 x i8> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
- ret <16 x i8> %res
-}
-define <16 x i8> @test_16xi8_perm_mem_mask0(<16 x i8>* %vp) {
-; GENERIC-LABEL: test_16xi8_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50]
-; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xi8_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50]
-; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i8>, <16 x i8>* %vp
- %res = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 9, i32 10, i32 7, i32 1, i32 12, i32 14, i32 14, i32 13, i32 14, i32 14, i32 8, i32 6, i32 11, i32 4, i32 12, i32 13>
- ret <16 x i8> %res
-}
-define <16 x i8> @test_masked_16xi8_perm_mem_mask0(<16 x i8>* %vp, <16 x i8> %vec2, <16 x i8> %mask) {
-; GENERIC-LABEL: test_masked_16xi8_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50]
-; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi8_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50]
-; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i8>, <16 x i8>* %vp
- %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 9, i32 10, i32 7, i32 1, i32 12, i32 14, i32 14, i32 13, i32 14, i32 14, i32 8, i32 6, i32 11, i32 4, i32 12, i32 13>
- %cmp = icmp eq <16 x i8> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2
- ret <16 x i8> %res
-}
-
-define <16 x i8> @test_masked_z_16xi8_perm_mem_mask0(<16 x i8>* %vp, <16 x i8> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi8_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50]
-; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi8_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50]
-; SKX-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i8>, <16 x i8>* %vp
- %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 9, i32 10, i32 7, i32 1, i32 12, i32 14, i32 14, i32 13, i32 14, i32 14, i32 8, i32 6, i32 11, i32 4, i32 12, i32 13>
- %cmp = icmp eq <16 x i8> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
- ret <16 x i8> %res
-}
-
-define <16 x i8> @test_masked_16xi8_perm_mem_mask1(<16 x i8>* %vp, <16 x i8> %vec2, <16 x i8> %mask) {
-; GENERIC-LABEL: test_masked_16xi8_perm_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50]
-; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi8_perm_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50]
-; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i8>, <16 x i8>* %vp
- %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 14, i32 9, i32 15, i32 9, i32 7, i32 10, i32 15, i32 14, i32 12, i32 1, i32 9, i32 7, i32 10, i32 13, i32 3, i32 11>
- %cmp = icmp eq <16 x i8> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2
- ret <16 x i8> %res
-}
-
-define <16 x i8> @test_masked_z_16xi8_perm_mem_mask1(<16 x i8>* %vp, <16 x i8> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi8_perm_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50]
-; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi8_perm_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50]
-; SKX-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i8>, <16 x i8>* %vp
- %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 14, i32 9, i32 15, i32 9, i32 7, i32 10, i32 15, i32 14, i32 12, i32 1, i32 9, i32 7, i32 10, i32 13, i32 3, i32 11>
- %cmp = icmp eq <16 x i8> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
- ret <16 x i8> %res
-}
-
-define <16 x i8> @test_masked_16xi8_perm_mem_mask2(<16 x i8>* %vp, <16 x i8> %vec2, <16 x i8> %mask) {
-; GENERIC-LABEL: test_masked_16xi8_perm_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50]
-; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi8_perm_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50]
-; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i8>, <16 x i8>* %vp
- %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 1, i32 3, i32 12, i32 5, i32 13, i32 1, i32 2, i32 11, i32 0, i32 9, i32 14, i32 8, i32 10, i32 0, i32 10, i32 9>
- %cmp = icmp eq <16 x i8> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2
- ret <16 x i8> %res
-}
-
-define <16 x i8> @test_masked_z_16xi8_perm_mem_mask2(<16 x i8>* %vp, <16 x i8> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi8_perm_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50]
-; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi8_perm_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50]
-; SKX-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i8>, <16 x i8>* %vp
- %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 1, i32 3, i32 12, i32 5, i32 13, i32 1, i32 2, i32 11, i32 0, i32 9, i32 14, i32 8, i32 10, i32 0, i32 10, i32 9>
- %cmp = icmp eq <16 x i8> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
- ret <16 x i8> %res
-}
-
-define <16 x i8> @test_16xi8_perm_mem_mask3(<16 x i8>* %vp) {
-; GENERIC-LABEL: test_16xi8_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50]
-; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xi8_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50]
-; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i8>, <16 x i8>* %vp
- %res = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 9, i32 6, i32 5, i32 15, i32 0, i32 0, i32 15, i32 2, i32 1, i32 3, i32 12, i32 14, i32 0, i32 6, i32 1, i32 4>
- ret <16 x i8> %res
-}
-define <16 x i8> @test_masked_16xi8_perm_mem_mask3(<16 x i8>* %vp, <16 x i8> %vec2, <16 x i8> %mask) {
-; GENERIC-LABEL: test_masked_16xi8_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50]
-; GENERIC-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi8_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa (%rdi), %xmm2 # sched: [6:0.50]
-; SKX-NEXT: vptestnmb %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i8>, <16 x i8>* %vp
- %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 9, i32 6, i32 5, i32 15, i32 0, i32 0, i32 15, i32 2, i32 1, i32 3, i32 12, i32 14, i32 0, i32 6, i32 1, i32 4>
- %cmp = icmp eq <16 x i8> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2
- ret <16 x i8> %res
-}
-
-define <16 x i8> @test_masked_z_16xi8_perm_mem_mask3(<16 x i8>* %vp, <16 x i8> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi8_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50]
-; GENERIC-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi8_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa (%rdi), %xmm1 # sched: [6:0.50]
-; SKX-NEXT: vptestnmb %xmm0, %xmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i8>, <16 x i8>* %vp
- %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 9, i32 6, i32 5, i32 15, i32 0, i32 0, i32 15, i32 2, i32 1, i32 3, i32 12, i32 14, i32 0, i32 6, i32 1, i32 4>
- %cmp = icmp eq <16 x i8> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
- ret <16 x i8> %res
-}
-
-define <32 x i8> @test_32xi8_perm_mask0(<32 x i8> %vec) {
-; GENERIC-LABEL: test_32xi8_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_32xi8_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 8, i32 0, i32 1, i32 15, i32 3, i32 5, i32 11, i32 13, i32 14, i32 2, i32 10, i32 15, i32 0, i32 10, i32 13, i32 5, i32 20, i32 25, i32 23, i32 18, i32 23, i32 22, i32 25, i32 24, i32 20, i32 21, i32 29, i32 20, i32 24, i32 16, i32 27, i32 21>
- ret <32 x i8> %res
-}
-define <32 x i8> @test_masked_32xi8_perm_mask0(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) {
-; GENERIC-LABEL: test_masked_32xi8_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [8:0.50]
-; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_32xi8_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [8:1.00]
-; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 8, i32 0, i32 1, i32 15, i32 3, i32 5, i32 11, i32 13, i32 14, i32 2, i32 10, i32 15, i32 0, i32 10, i32 13, i32 5, i32 20, i32 25, i32 23, i32 18, i32 23, i32 22, i32 25, i32 24, i32 20, i32 21, i32 29, i32 20, i32 24, i32 16, i32 27, i32 21>
- %cmp = icmp eq <32 x i8> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2
- ret <32 x i8> %res
-}
-
-define <32 x i8> @test_masked_z_32xi8_perm_mask0(<32 x i8> %vec, <32 x i8> %mask) {
-; GENERIC-LABEL: test_masked_z_32xi8_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_32xi8_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 8, i32 0, i32 1, i32 15, i32 3, i32 5, i32 11, i32 13, i32 14, i32 2, i32 10, i32 15, i32 0, i32 10, i32 13, i32 5, i32 20, i32 25, i32 23, i32 18, i32 23, i32 22, i32 25, i32 24, i32 20, i32 21, i32 29, i32 20, i32 24, i32 16, i32 27, i32 21>
- %cmp = icmp eq <32 x i8> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
- ret <32 x i8> %res
-}
-define <32 x i8> @test_masked_32xi8_perm_mask1(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) {
-; GENERIC-LABEL: test_masked_32xi8_perm_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [8:0.50]
-; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_32xi8_perm_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [8:1.00]
-; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 0, i32 4, i32 3, i32 15, i32 5, i32 4, i32 5, i32 15, i32 10, i32 9, i32 11, i32 6, i32 6, i32 10, i32 0, i32 3, i32 21, i32 19, i32 26, i32 22, i32 30, i32 25, i32 22, i32 22, i32 27, i32 22, i32 26, i32 16, i32 23, i32 20, i32 18, i32 24>
- %cmp = icmp eq <32 x i8> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2
- ret <32 x i8> %res
-}
-
-define <32 x i8> @test_masked_z_32xi8_perm_mask1(<32 x i8> %vec, <32 x i8> %mask) {
-; GENERIC-LABEL: test_masked_z_32xi8_perm_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_32xi8_perm_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 0, i32 4, i32 3, i32 15, i32 5, i32 4, i32 5, i32 15, i32 10, i32 9, i32 11, i32 6, i32 6, i32 10, i32 0, i32 3, i32 21, i32 19, i32 26, i32 22, i32 30, i32 25, i32 22, i32 22, i32 27, i32 22, i32 26, i32 16, i32 23, i32 20, i32 18, i32 24>
- %cmp = icmp eq <32 x i8> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
- ret <32 x i8> %res
-}
-define <32 x i8> @test_masked_32xi8_perm_mask2(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) {
-; GENERIC-LABEL: test_masked_32xi8_perm_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [8:0.50]
-; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_32xi8_perm_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [8:1.00]
-; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 7, i32 8, i32 12, i32 14, i32 7, i32 4, i32 7, i32 12, i32 14, i32 12, i32 3, i32 15, i32 10, i32 1, i32 11, i32 15, i32 22, i32 26, i32 21, i32 19, i32 27, i32 16, i32 29, i32 24, i32 17, i32 17, i32 26, i32 29, i32 20, i32 31, i32 17, i32 29>
- %cmp = icmp eq <32 x i8> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2
- ret <32 x i8> %res
-}
-
-define <32 x i8> @test_masked_z_32xi8_perm_mask2(<32 x i8> %vec, <32 x i8> %mask) {
-; GENERIC-LABEL: test_masked_z_32xi8_perm_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_32xi8_perm_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 7, i32 8, i32 12, i32 14, i32 7, i32 4, i32 7, i32 12, i32 14, i32 12, i32 3, i32 15, i32 10, i32 1, i32 11, i32 15, i32 22, i32 26, i32 21, i32 19, i32 27, i32 16, i32 29, i32 24, i32 17, i32 17, i32 26, i32 29, i32 20, i32 31, i32 17, i32 29>
- %cmp = icmp eq <32 x i8> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
- ret <32 x i8> %res
-}
-define <32 x i8> @test_32xi8_perm_mask3(<32 x i8> %vec) {
-; GENERIC-LABEL: test_32xi8_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_32xi8_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 6, i32 1, i32 4, i32 7, i32 12, i32 13, i32 2, i32 8, i32 10, i32 5, i32 13, i32 4, i32 0, i32 0, i32 10, i32 8, i32 31, i32 31, i32 30, i32 16, i32 27, i32 27, i32 26, i32 27, i32 30, i32 26, i32 21, i32 24, i32 19, i32 25, i32 16, i32 18>
- ret <32 x i8> %res
-}
-define <32 x i8> @test_masked_32xi8_perm_mask3(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) {
-; GENERIC-LABEL: test_masked_32xi8_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [8:0.50]
-; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_32xi8_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmb %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [8:1.00]
-; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 6, i32 1, i32 4, i32 7, i32 12, i32 13, i32 2, i32 8, i32 10, i32 5, i32 13, i32 4, i32 0, i32 0, i32 10, i32 8, i32 31, i32 31, i32 30, i32 16, i32 27, i32 27, i32 26, i32 27, i32 30, i32 26, i32 21, i32 24, i32 19, i32 25, i32 16, i32 18>
- %cmp = icmp eq <32 x i8> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2
- ret <32 x i8> %res
-}
-
-define <32 x i8> @test_masked_z_32xi8_perm_mask3(<32 x i8> %vec, <32 x i8> %mask) {
-; GENERIC-LABEL: test_masked_z_32xi8_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_32xi8_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 6, i32 1, i32 4, i32 7, i32 12, i32 13, i32 2, i32 8, i32 10, i32 5, i32 13, i32 4, i32 0, i32 0, i32 10, i32 8, i32 31, i32 31, i32 30, i32 16, i32 27, i32 27, i32 26, i32 27, i32 30, i32 26, i32 21, i32 24, i32 19, i32 25, i32 16, i32 18>
- %cmp = icmp eq <32 x i8> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
- ret <32 x i8> %res
-}
-define <32 x i8> @test_32xi8_perm_mem_mask0(<32 x i8>* %vp) {
-; GENERIC-LABEL: test_32xi8_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa (%rdi), %ymm0 # sched: [7:0.50]
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_32xi8_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa (%rdi), %ymm0 # sched: [7:0.50]
-; SKX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i8>, <32 x i8>* %vp
- %res = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 9, i32 0, i32 2, i32 15, i32 4, i32 6, i32 8, i32 4, i32 7, i32 3, i32 0, i32 2, i32 8, i32 1, i32 6, i32 5, i32 22, i32 17, i32 30, i32 23, i32 29, i32 31, i32 21, i32 23, i32 27, i32 22, i32 20, i32 27, i32 30, i32 30, i32 26, i32 22>
- ret <32 x i8> %res
-}
-define <32 x i8> @test_masked_32xi8_perm_mem_mask0(<32 x i8>* %vp, <32 x i8> %vec2, <32 x i8> %mask) {
-; GENERIC-LABEL: test_masked_32xi8_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50]
-; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_32xi8_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50]
-; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i8>, <32 x i8>* %vp
- %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 9, i32 0, i32 2, i32 15, i32 4, i32 6, i32 8, i32 4, i32 7, i32 3, i32 0, i32 2, i32 8, i32 1, i32 6, i32 5, i32 22, i32 17, i32 30, i32 23, i32 29, i32 31, i32 21, i32 23, i32 27, i32 22, i32 20, i32 27, i32 30, i32 30, i32 26, i32 22>
- %cmp = icmp eq <32 x i8> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2
- ret <32 x i8> %res
-}
-
-define <32 x i8> @test_masked_z_32xi8_perm_mem_mask0(<32 x i8>* %vp, <32 x i8> %mask) {
-; GENERIC-LABEL: test_masked_z_32xi8_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50]
-; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_32xi8_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50]
-; SKX-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i8>, <32 x i8>* %vp
- %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 9, i32 0, i32 2, i32 15, i32 4, i32 6, i32 8, i32 4, i32 7, i32 3, i32 0, i32 2, i32 8, i32 1, i32 6, i32 5, i32 22, i32 17, i32 30, i32 23, i32 29, i32 31, i32 21, i32 23, i32 27, i32 22, i32 20, i32 27, i32 30, i32 30, i32 26, i32 22>
- %cmp = icmp eq <32 x i8> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
- ret <32 x i8> %res
-}
-
-define <32 x i8> @test_masked_32xi8_perm_mem_mask1(<32 x i8>* %vp, <32 x i8> %vec2, <32 x i8> %mask) {
-; GENERIC-LABEL: test_masked_32xi8_perm_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50]
-; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_32xi8_perm_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50]
-; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i8>, <32 x i8>* %vp
- %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 15, i32 10, i32 1, i32 1, i32 11, i32 0, i32 0, i32 6, i32 8, i32 7, i32 7, i32 9, i32 10, i32 6, i32 5, i32 15, i32 20, i32 28, i32 22, i32 21, i32 17, i32 29, i32 27, i32 30, i32 23, i32 26, i32 17, i32 22, i32 19, i32 16, i32 31, i32 19>
- %cmp = icmp eq <32 x i8> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2
- ret <32 x i8> %res
-}
-
-define <32 x i8> @test_masked_z_32xi8_perm_mem_mask1(<32 x i8>* %vp, <32 x i8> %mask) {
-; GENERIC-LABEL: test_masked_z_32xi8_perm_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50]
-; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_32xi8_perm_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50]
-; SKX-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i8>, <32 x i8>* %vp
- %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 15, i32 10, i32 1, i32 1, i32 11, i32 0, i32 0, i32 6, i32 8, i32 7, i32 7, i32 9, i32 10, i32 6, i32 5, i32 15, i32 20, i32 28, i32 22, i32 21, i32 17, i32 29, i32 27, i32 30, i32 23, i32 26, i32 17, i32 22, i32 19, i32 16, i32 31, i32 19>
- %cmp = icmp eq <32 x i8> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
- ret <32 x i8> %res
-}
-
-define <32 x i8> @test_masked_32xi8_perm_mem_mask2(<32 x i8>* %vp, <32 x i8> %vec2, <32 x i8> %mask) {
-; GENERIC-LABEL: test_masked_32xi8_perm_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50]
-; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_32xi8_perm_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50]
-; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i8>, <32 x i8>* %vp
- %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 6, i32 8, i32 2, i32 15, i32 15, i32 2, i32 6, i32 10, i32 14, i32 7, i32 14, i32 5, i32 7, i32 7, i32 26, i32 19, i32 25, i32 19, i32 21, i32 31, i32 30, i32 29, i32 16, i32 18, i32 20, i32 28, i32 29, i32 25, i32 27, i32 28>
- %cmp = icmp eq <32 x i8> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2
- ret <32 x i8> %res
-}
-
-define <32 x i8> @test_masked_z_32xi8_perm_mem_mask2(<32 x i8>* %vp, <32 x i8> %mask) {
-; GENERIC-LABEL: test_masked_z_32xi8_perm_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50]
-; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_32xi8_perm_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50]
-; SKX-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i8>, <32 x i8>* %vp
- %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 6, i32 8, i32 2, i32 15, i32 15, i32 2, i32 6, i32 10, i32 14, i32 7, i32 14, i32 5, i32 7, i32 7, i32 26, i32 19, i32 25, i32 19, i32 21, i32 31, i32 30, i32 29, i32 16, i32 18, i32 20, i32 28, i32 29, i32 25, i32 27, i32 28>
- %cmp = icmp eq <32 x i8> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
- ret <32 x i8> %res
-}
-
-define <32 x i8> @test_32xi8_perm_mem_mask3(<32 x i8>* %vp) {
-; GENERIC-LABEL: test_32xi8_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa (%rdi), %ymm0 # sched: [7:0.50]
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_32xi8_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa (%rdi), %ymm0 # sched: [7:0.50]
-; SKX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i8>, <32 x i8>* %vp
- %res = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 1, i32 1, i32 13, i32 0, i32 3, i32 0, i32 0, i32 13, i32 5, i32 2, i32 2, i32 10, i32 15, i32 8, i32 14, i32 8, i32 25, i32 26, i32 28, i32 28, i32 31, i32 27, i32 30, i32 19, i32 24, i32 25, i32 29, i32 23, i32 28, i32 22, i32 25, i32 29>
- ret <32 x i8> %res
-}
-define <32 x i8> @test_masked_32xi8_perm_mem_mask3(<32 x i8>* %vp, <32 x i8> %vec2, <32 x i8> %mask) {
-; GENERIC-LABEL: test_masked_32xi8_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50]
-; GENERIC-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_32xi8_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa (%rdi), %ymm2 # sched: [7:0.50]
-; SKX-NEXT: vptestnmb %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i8>, <32 x i8>* %vp
- %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 1, i32 1, i32 13, i32 0, i32 3, i32 0, i32 0, i32 13, i32 5, i32 2, i32 2, i32 10, i32 15, i32 8, i32 14, i32 8, i32 25, i32 26, i32 28, i32 28, i32 31, i32 27, i32 30, i32 19, i32 24, i32 25, i32 29, i32 23, i32 28, i32 22, i32 25, i32 29>
- %cmp = icmp eq <32 x i8> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2
- ret <32 x i8> %res
-}
-
-define <32 x i8> @test_masked_z_32xi8_perm_mem_mask3(<32 x i8>* %vp, <32 x i8> %mask) {
-; GENERIC-LABEL: test_masked_z_32xi8_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50]
-; GENERIC-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_32xi8_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa (%rdi), %ymm1 # sched: [7:0.50]
-; SKX-NEXT: vptestnmb %ymm0, %ymm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i8>, <32 x i8>* %vp
- %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 1, i32 1, i32 13, i32 0, i32 3, i32 0, i32 0, i32 13, i32 5, i32 2, i32 2, i32 10, i32 15, i32 8, i32 14, i32 8, i32 25, i32 26, i32 28, i32 28, i32 31, i32 27, i32 30, i32 19, i32 24, i32 25, i32 29, i32 23, i32 28, i32 22, i32 25, i32 29>
- %cmp = icmp eq <32 x i8> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
- ret <32 x i8> %res
-}
-
-define <64 x i8> @test_64xi8_perm_mask0(<64 x i8> %vec) {
-; GENERIC-LABEL: test_64xi8_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_64xi8_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 8, i32 4, i32 1, i32 13, i32 15, i32 4, i32 6, i32 12, i32 0, i32 10, i32 2, i32 4, i32 13, i32 0, i32 0, i32 6, i32 23, i32 29, i32 27, i32 26, i32 18, i32 31, i32 22, i32 25, i32 22, i32 16, i32 23, i32 18, i32 16, i32 25, i32 26, i32 17, i32 40, i32 37, i32 38, i32 44, i32 39, i32 46, i32 41, i32 39, i32 42, i32 37, i32 33, i32 42, i32 41, i32 44, i32 34, i32 46, i32 60, i32 62, i32 61, i32 58, i32 60, i32 56, i32 60, i32 51, i32 60, i32 55, i32 60, i32 55, i32 60, i32 49, i32 48, i32 62>
- ret <64 x i8> %res
-}
-define <64 x i8> @test_masked_64xi8_perm_mask0(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) {
-; GENERIC-LABEL: test_masked_64xi8_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [8:0.50]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_64xi8_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [8:1.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 8, i32 4, i32 1, i32 13, i32 15, i32 4, i32 6, i32 12, i32 0, i32 10, i32 2, i32 4, i32 13, i32 0, i32 0, i32 6, i32 23, i32 29, i32 27, i32 26, i32 18, i32 31, i32 22, i32 25, i32 22, i32 16, i32 23, i32 18, i32 16, i32 25, i32 26, i32 17, i32 40, i32 37, i32 38, i32 44, i32 39, i32 46, i32 41, i32 39, i32 42, i32 37, i32 33, i32 42, i32 41, i32 44, i32 34, i32 46, i32 60, i32 62, i32 61, i32 58, i32 60, i32 56, i32 60, i32 51, i32 60, i32 55, i32 60, i32 55, i32 60, i32 49, i32 48, i32 62>
- %cmp = icmp eq <64 x i8> %mask, zeroinitializer
- %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2
- ret <64 x i8> %res
-}
-
-define <64 x i8> @test_masked_z_64xi8_perm_mask0(<64 x i8> %vec, <64 x i8> %mask) {
-; GENERIC-LABEL: test_masked_z_64xi8_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_64xi8_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 8, i32 4, i32 1, i32 13, i32 15, i32 4, i32 6, i32 12, i32 0, i32 10, i32 2, i32 4, i32 13, i32 0, i32 0, i32 6, i32 23, i32 29, i32 27, i32 26, i32 18, i32 31, i32 22, i32 25, i32 22, i32 16, i32 23, i32 18, i32 16, i32 25, i32 26, i32 17, i32 40, i32 37, i32 38, i32 44, i32 39, i32 46, i32 41, i32 39, i32 42, i32 37, i32 33, i32 42, i32 41, i32 44, i32 34, i32 46, i32 60, i32 62, i32 61, i32 58, i32 60, i32 56, i32 60, i32 51, i32 60, i32 55, i32 60, i32 55, i32 60, i32 49, i32 48, i32 62>
- %cmp = icmp eq <64 x i8> %mask, zeroinitializer
- %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
- ret <64 x i8> %res
-}
-define <64 x i8> @test_masked_64xi8_perm_mask1(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) {
-; GENERIC-LABEL: test_masked_64xi8_perm_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [8:0.50]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_64xi8_perm_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [8:1.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 7, i32 14, i32 15, i32 10, i32 9, i32 3, i32 1, i32 13, i32 14, i32 12, i32 11, i32 6, i32 4, i32 1, i32 6, i32 9, i32 30, i32 30, i32 22, i32 17, i32 28, i32 27, i32 16, i32 23, i32 26, i32 16, i32 30, i32 31, i32 27, i32 17, i32 17, i32 21, i32 32, i32 37, i32 32, i32 47, i32 45, i32 33, i32 46, i32 35, i32 35, i32 42, i32 47, i32 33, i32 32, i32 37, i32 32, i32 41, i32 61, i32 50, i32 49, i32 53, i32 63, i32 50, i32 63, i32 53, i32 55, i32 52, i32 62, i32 63, i32 58, i32 50, i32 63, i32 49>
- %cmp = icmp eq <64 x i8> %mask, zeroinitializer
- %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2
- ret <64 x i8> %res
-}
-
-define <64 x i8> @test_masked_z_64xi8_perm_mask1(<64 x i8> %vec, <64 x i8> %mask) {
-; GENERIC-LABEL: test_masked_z_64xi8_perm_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_64xi8_perm_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 7, i32 14, i32 15, i32 10, i32 9, i32 3, i32 1, i32 13, i32 14, i32 12, i32 11, i32 6, i32 4, i32 1, i32 6, i32 9, i32 30, i32 30, i32 22, i32 17, i32 28, i32 27, i32 16, i32 23, i32 26, i32 16, i32 30, i32 31, i32 27, i32 17, i32 17, i32 21, i32 32, i32 37, i32 32, i32 47, i32 45, i32 33, i32 46, i32 35, i32 35, i32 42, i32 47, i32 33, i32 32, i32 37, i32 32, i32 41, i32 61, i32 50, i32 49, i32 53, i32 63, i32 50, i32 63, i32 53, i32 55, i32 52, i32 62, i32 63, i32 58, i32 50, i32 63, i32 49>
- %cmp = icmp eq <64 x i8> %mask, zeroinitializer
- %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
- ret <64 x i8> %res
-}
-define <64 x i8> @test_masked_64xi8_perm_mask2(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) {
-; GENERIC-LABEL: test_masked_64xi8_perm_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [8:0.50]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_64xi8_perm_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [8:1.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 9, i32 2, i32 14, i32 15, i32 12, i32 5, i32 3, i32 12, i32 4, i32 6, i32 0, i32 2, i32 0, i32 1, i32 1, i32 6, i32 24, i32 27, i32 18, i32 22, i32 26, i32 17, i32 23, i32 21, i32 31, i32 16, i32 22, i32 22, i32 27, i32 21, i32 19, i32 20, i32 39, i32 47, i32 44, i32 36, i32 40, i32 43, i32 44, i32 39, i32 38, i32 44, i32 38, i32 35, i32 39, i32 46, i32 34, i32 39, i32 58, i32 55, i32 51, i32 48, i32 59, i32 57, i32 48, i32 52, i32 60, i32 58, i32 56, i32 50, i32 59, i32 55, i32 58, i32 60>
- %cmp = icmp eq <64 x i8> %mask, zeroinitializer
- %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2
- ret <64 x i8> %res
-}
-
-define <64 x i8> @test_masked_z_64xi8_perm_mask2(<64 x i8> %vec, <64 x i8> %mask) {
-; GENERIC-LABEL: test_masked_z_64xi8_perm_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_64xi8_perm_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 9, i32 2, i32 14, i32 15, i32 12, i32 5, i32 3, i32 12, i32 4, i32 6, i32 0, i32 2, i32 0, i32 1, i32 1, i32 6, i32 24, i32 27, i32 18, i32 22, i32 26, i32 17, i32 23, i32 21, i32 31, i32 16, i32 22, i32 22, i32 27, i32 21, i32 19, i32 20, i32 39, i32 47, i32 44, i32 36, i32 40, i32 43, i32 44, i32 39, i32 38, i32 44, i32 38, i32 35, i32 39, i32 46, i32 34, i32 39, i32 58, i32 55, i32 51, i32 48, i32 59, i32 57, i32 48, i32 52, i32 60, i32 58, i32 56, i32 50, i32 59, i32 55, i32 58, i32 60>
- %cmp = icmp eq <64 x i8> %mask, zeroinitializer
- %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
- ret <64 x i8> %res
-}
-define <64 x i8> @test_64xi8_perm_mask3(<64 x i8> %vec) {
-; GENERIC-LABEL: test_64xi8_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_64xi8_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 3, i32 12, i32 4, i32 15, i32 1, i32 14, i32 0, i32 4, i32 8, i32 9, i32 6, i32 1, i32 4, i32 4, i32 12, i32 14, i32 25, i32 16, i32 28, i32 20, i32 21, i32 24, i32 19, i32 30, i32 18, i32 22, i32 20, i32 24, i32 25, i32 26, i32 24, i32 22, i32 42, i32 38, i32 44, i32 44, i32 36, i32 37, i32 42, i32 34, i32 43, i32 38, i32 41, i32 34, i32 42, i32 37, i32 39, i32 38, i32 55, i32 59, i32 53, i32 58, i32 48, i32 52, i32 59, i32 48, i32 57, i32 48, i32 55, i32 62, i32 48, i32 56, i32 49, i32 61>
- ret <64 x i8> %res
-}
-define <64 x i8> @test_masked_64xi8_perm_mask3(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) {
-; GENERIC-LABEL: test_masked_64xi8_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [8:0.50]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_64xi8_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmb %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [8:1.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 3, i32 12, i32 4, i32 15, i32 1, i32 14, i32 0, i32 4, i32 8, i32 9, i32 6, i32 1, i32 4, i32 4, i32 12, i32 14, i32 25, i32 16, i32 28, i32 20, i32 21, i32 24, i32 19, i32 30, i32 18, i32 22, i32 20, i32 24, i32 25, i32 26, i32 24, i32 22, i32 42, i32 38, i32 44, i32 44, i32 36, i32 37, i32 42, i32 34, i32 43, i32 38, i32 41, i32 34, i32 42, i32 37, i32 39, i32 38, i32 55, i32 59, i32 53, i32 58, i32 48, i32 52, i32 59, i32 48, i32 57, i32 48, i32 55, i32 62, i32 48, i32 56, i32 49, i32 61>
- %cmp = icmp eq <64 x i8> %mask, zeroinitializer
- %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2
- ret <64 x i8> %res
-}
-
-define <64 x i8> @test_masked_z_64xi8_perm_mask3(<64 x i8> %vec, <64 x i8> %mask) {
-; GENERIC-LABEL: test_masked_z_64xi8_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_64xi8_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 3, i32 12, i32 4, i32 15, i32 1, i32 14, i32 0, i32 4, i32 8, i32 9, i32 6, i32 1, i32 4, i32 4, i32 12, i32 14, i32 25, i32 16, i32 28, i32 20, i32 21, i32 24, i32 19, i32 30, i32 18, i32 22, i32 20, i32 24, i32 25, i32 26, i32 24, i32 22, i32 42, i32 38, i32 44, i32 44, i32 36, i32 37, i32 42, i32 34, i32 43, i32 38, i32 41, i32 34, i32 42, i32 37, i32 39, i32 38, i32 55, i32 59, i32 53, i32 58, i32 48, i32 52, i32 59, i32 48, i32 57, i32 48, i32 55, i32 62, i32 48, i32 56, i32 49, i32 61>
- %cmp = icmp eq <64 x i8> %mask, zeroinitializer
- %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
- ret <64 x i8> %res
-}
-define <64 x i8> @test_64xi8_perm_mem_mask0(<64 x i8>* %vp) {
-; GENERIC-LABEL: test_64xi8_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm0 # sched: [7:0.50]
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_64xi8_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 (%rdi), %zmm0 # sched: [8:0.50]
-; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <64 x i8>, <64 x i8>* %vp
- %res = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 0, i32 9, i32 15, i32 13, i32 11, i32 11, i32 3, i32 12, i32 4, i32 1, i32 7, i32 5, i32 2, i32 6, i32 14, i32 6, i32 23, i32 27, i32 24, i32 18, i32 30, i32 23, i32 28, i32 22, i32 28, i32 22, i32 19, i32 19, i32 31, i32 25, i32 16, i32 22, i32 35, i32 33, i32 34, i32 32, i32 42, i32 34, i32 41, i32 41, i32 43, i32 40, i32 36, i32 46, i32 37, i32 39, i32 42, i32 40, i32 63, i32 63, i32 62, i32 62, i32 57, i32 55, i32 59, i32 51, i32 52, i32 48, i32 50, i32 48, i32 58, i32 50, i32 60, i32 58>
- ret <64 x i8> %res
-}
-define <64 x i8> @test_masked_64xi8_perm_mem_mask0(<64 x i8>* %vp, <64 x i8> %vec2, <64 x i8> %mask) {
-; GENERIC-LABEL: test_masked_64xi8_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [7:0.50]
-; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_64xi8_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [8:0.50]
-; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <64 x i8>, <64 x i8>* %vp
- %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 0, i32 9, i32 15, i32 13, i32 11, i32 11, i32 3, i32 12, i32 4, i32 1, i32 7, i32 5, i32 2, i32 6, i32 14, i32 6, i32 23, i32 27, i32 24, i32 18, i32 30, i32 23, i32 28, i32 22, i32 28, i32 22, i32 19, i32 19, i32 31, i32 25, i32 16, i32 22, i32 35, i32 33, i32 34, i32 32, i32 42, i32 34, i32 41, i32 41, i32 43, i32 40, i32 36, i32 46, i32 37, i32 39, i32 42, i32 40, i32 63, i32 63, i32 62, i32 62, i32 57, i32 55, i32 59, i32 51, i32 52, i32 48, i32 50, i32 48, i32 58, i32 50, i32 60, i32 58>
- %cmp = icmp eq <64 x i8> %mask, zeroinitializer
- %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2
- ret <64 x i8> %res
-}
-
-define <64 x i8> @test_masked_z_64xi8_perm_mem_mask0(<64 x i8>* %vp, <64 x i8> %mask) {
-; GENERIC-LABEL: test_masked_z_64xi8_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [7:0.50]
-; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_64xi8_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [8:0.50]
-; SKX-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <64 x i8>, <64 x i8>* %vp
- %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 0, i32 9, i32 15, i32 13, i32 11, i32 11, i32 3, i32 12, i32 4, i32 1, i32 7, i32 5, i32 2, i32 6, i32 14, i32 6, i32 23, i32 27, i32 24, i32 18, i32 30, i32 23, i32 28, i32 22, i32 28, i32 22, i32 19, i32 19, i32 31, i32 25, i32 16, i32 22, i32 35, i32 33, i32 34, i32 32, i32 42, i32 34, i32 41, i32 41, i32 43, i32 40, i32 36, i32 46, i32 37, i32 39, i32 42, i32 40, i32 63, i32 63, i32 62, i32 62, i32 57, i32 55, i32 59, i32 51, i32 52, i32 48, i32 50, i32 48, i32 58, i32 50, i32 60, i32 58>
- %cmp = icmp eq <64 x i8> %mask, zeroinitializer
- %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
- ret <64 x i8> %res
-}
-
-define <64 x i8> @test_masked_64xi8_perm_mem_mask1(<64 x i8>* %vp, <64 x i8> %vec2, <64 x i8> %mask) {
-; GENERIC-LABEL: test_masked_64xi8_perm_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [7:0.50]
-; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_64xi8_perm_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [8:0.50]
-; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <64 x i8>, <64 x i8>* %vp
- %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 15, i32 6, i32 14, i32 7, i32 5, i32 1, i32 14, i32 12, i32 5, i32 7, i32 5, i32 0, i32 0, i32 5, i32 3, i32 8, i32 19, i32 19, i32 26, i32 27, i32 20, i32 29, i32 20, i32 21, i32 27, i32 16, i32 30, i32 17, i32 23, i32 27, i32 16, i32 28, i32 47, i32 39, i32 33, i32 33, i32 33, i32 44, i32 38, i32 46, i32 39, i32 33, i32 38, i32 44, i32 45, i32 32, i32 34, i32 39, i32 50, i32 61, i32 62, i32 53, i32 54, i32 56, i32 52, i32 56, i32 51, i32 52, i32 55, i32 57, i32 56, i32 52, i32 51, i32 49>
- %cmp = icmp eq <64 x i8> %mask, zeroinitializer
- %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2
- ret <64 x i8> %res
-}
-
-define <64 x i8> @test_masked_z_64xi8_perm_mem_mask1(<64 x i8>* %vp, <64 x i8> %mask) {
-; GENERIC-LABEL: test_masked_z_64xi8_perm_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [7:0.50]
-; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_64xi8_perm_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [8:0.50]
-; SKX-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <64 x i8>, <64 x i8>* %vp
- %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 15, i32 6, i32 14, i32 7, i32 5, i32 1, i32 14, i32 12, i32 5, i32 7, i32 5, i32 0, i32 0, i32 5, i32 3, i32 8, i32 19, i32 19, i32 26, i32 27, i32 20, i32 29, i32 20, i32 21, i32 27, i32 16, i32 30, i32 17, i32 23, i32 27, i32 16, i32 28, i32 47, i32 39, i32 33, i32 33, i32 33, i32 44, i32 38, i32 46, i32 39, i32 33, i32 38, i32 44, i32 45, i32 32, i32 34, i32 39, i32 50, i32 61, i32 62, i32 53, i32 54, i32 56, i32 52, i32 56, i32 51, i32 52, i32 55, i32 57, i32 56, i32 52, i32 51, i32 49>
- %cmp = icmp eq <64 x i8> %mask, zeroinitializer
- %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
- ret <64 x i8> %res
-}
-
-define <64 x i8> @test_masked_64xi8_perm_mem_mask2(<64 x i8>* %vp, <64 x i8> %vec2, <64 x i8> %mask) {
-; GENERIC-LABEL: test_masked_64xi8_perm_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [7:0.50]
-; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_64xi8_perm_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [8:0.50]
-; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <64 x i8>, <64 x i8>* %vp
- %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 12, i32 1, i32 11, i32 3, i32 4, i32 11, i32 10, i32 11, i32 8, i32 13, i32 1, i32 10, i32 1, i32 11, i32 5, i32 10, i32 27, i32 26, i32 19, i32 29, i32 19, i32 24, i32 26, i32 19, i32 26, i32 20, i32 18, i32 28, i32 24, i32 21, i32 25, i32 16, i32 34, i32 38, i32 47, i32 40, i32 33, i32 44, i32 44, i32 44, i32 41, i32 43, i32 35, i32 43, i32 45, i32 44, i32 37, i32 41, i32 58, i32 62, i32 49, i32 61, i32 56, i32 53, i32 55, i32 48, i32 51, i32 58, i32 58, i32 55, i32 63, i32 55, i32 53, i32 61>
- %cmp = icmp eq <64 x i8> %mask, zeroinitializer
- %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2
- ret <64 x i8> %res
-}
-
-define <64 x i8> @test_masked_z_64xi8_perm_mem_mask2(<64 x i8>* %vp, <64 x i8> %mask) {
-; GENERIC-LABEL: test_masked_z_64xi8_perm_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [7:0.50]
-; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_64xi8_perm_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [8:0.50]
-; SKX-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <64 x i8>, <64 x i8>* %vp
- %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 12, i32 1, i32 11, i32 3, i32 4, i32 11, i32 10, i32 11, i32 8, i32 13, i32 1, i32 10, i32 1, i32 11, i32 5, i32 10, i32 27, i32 26, i32 19, i32 29, i32 19, i32 24, i32 26, i32 19, i32 26, i32 20, i32 18, i32 28, i32 24, i32 21, i32 25, i32 16, i32 34, i32 38, i32 47, i32 40, i32 33, i32 44, i32 44, i32 44, i32 41, i32 43, i32 35, i32 43, i32 45, i32 44, i32 37, i32 41, i32 58, i32 62, i32 49, i32 61, i32 56, i32 53, i32 55, i32 48, i32 51, i32 58, i32 58, i32 55, i32 63, i32 55, i32 53, i32 61>
- %cmp = icmp eq <64 x i8> %mask, zeroinitializer
- %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
- ret <64 x i8> %res
-}
-
-define <64 x i8> @test_64xi8_perm_mem_mask3(<64 x i8>* %vp) {
-; GENERIC-LABEL: test_64xi8_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm0 # sched: [7:0.50]
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_64xi8_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 (%rdi), %zmm0 # sched: [8:0.50]
-; SKX-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <64 x i8>, <64 x i8>* %vp
- %res = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 4, i32 9, i32 11, i32 13, i32 12, i32 6, i32 0, i32 0, i32 11, i32 15, i32 5, i32 7, i32 11, i32 10, i32 4, i32 10, i32 20, i32 21, i32 24, i32 27, i32 18, i32 16, i32 26, i32 16, i32 16, i32 19, i32 26, i32 17, i32 16, i32 31, i32 22, i32 30, i32 35, i32 38, i32 37, i32 34, i32 37, i32 47, i32 43, i32 38, i32 38, i32 36, i32 40, i32 43, i32 42, i32 39, i32 32, i32 46, i32 54, i32 54, i32 48, i32 50, i32 61, i32 56, i32 59, i32 50, i32 53, i32 61, i32 61, i32 51, i32 48, i32 60, i32 50, i32 60>
- ret <64 x i8> %res
-}
-define <64 x i8> @test_masked_64xi8_perm_mem_mask3(<64 x i8>* %vp, <64 x i8> %vec2, <64 x i8> %mask) {
-; GENERIC-LABEL: test_masked_64xi8_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [7:0.50]
-; GENERIC-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_64xi8_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 (%rdi), %zmm2 # sched: [8:0.50]
-; SKX-NEXT: vptestnmb %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <64 x i8>, <64 x i8>* %vp
- %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 4, i32 9, i32 11, i32 13, i32 12, i32 6, i32 0, i32 0, i32 11, i32 15, i32 5, i32 7, i32 11, i32 10, i32 4, i32 10, i32 20, i32 21, i32 24, i32 27, i32 18, i32 16, i32 26, i32 16, i32 16, i32 19, i32 26, i32 17, i32 16, i32 31, i32 22, i32 30, i32 35, i32 38, i32 37, i32 34, i32 37, i32 47, i32 43, i32 38, i32 38, i32 36, i32 40, i32 43, i32 42, i32 39, i32 32, i32 46, i32 54, i32 54, i32 48, i32 50, i32 61, i32 56, i32 59, i32 50, i32 53, i32 61, i32 61, i32 51, i32 48, i32 60, i32 50, i32 60>
- %cmp = icmp eq <64 x i8> %mask, zeroinitializer
- %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2
- ret <64 x i8> %res
-}
-
-define <64 x i8> @test_masked_z_64xi8_perm_mem_mask3(<64 x i8>* %vp, <64 x i8> %mask) {
-; GENERIC-LABEL: test_masked_z_64xi8_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [7:0.50]
-; GENERIC-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [8:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_64xi8_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa64 (%rdi), %zmm1 # sched: [8:0.50]
-; SKX-NEXT: vptestnmb %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <64 x i8>, <64 x i8>* %vp
- %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 4, i32 9, i32 11, i32 13, i32 12, i32 6, i32 0, i32 0, i32 11, i32 15, i32 5, i32 7, i32 11, i32 10, i32 4, i32 10, i32 20, i32 21, i32 24, i32 27, i32 18, i32 16, i32 26, i32 16, i32 16, i32 19, i32 26, i32 17, i32 16, i32 31, i32 22, i32 30, i32 35, i32 38, i32 37, i32 34, i32 37, i32 47, i32 43, i32 38, i32 38, i32 36, i32 40, i32 43, i32 42, i32 39, i32 32, i32 46, i32 54, i32 54, i32 48, i32 50, i32 61, i32 56, i32 59, i32 50, i32 53, i32 61, i32 61, i32 51, i32 48, i32 60, i32 50, i32 60>
- %cmp = icmp eq <64 x i8> %mask, zeroinitializer
- %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
- ret <64 x i8> %res
-}
-
-define <8 x i16> @test_8xi16_perm_high_mask0(<8 x i16> %vec) {
-; GENERIC-LABEL: test_8xi16_perm_high_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,7,6] sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi16_perm_high_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,7,6] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 7, i32 6>
- ret <8 x i16> %res
-}
-define <8 x i16> @test_masked_8xi16_perm_high_mask0(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) {
-; GENERIC-LABEL: test_masked_8xi16_perm_high_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,7,6] sched: [1:0.50]
-; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi16_perm_high_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,7,6] sched: [1:1.00]
-; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 7, i32 6>
- %cmp = icmp eq <8 x i16> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2
- ret <8 x i16> %res
-}
-
-define <8 x i16> @test_masked_z_8xi16_perm_high_mask0(<8 x i16> %vec, <8 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,7,6] sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi16_perm_high_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,7,6] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 7, i32 6>
- %cmp = icmp eq <8 x i16> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
- ret <8 x i16> %res
-}
-define <8 x i16> @test_masked_8xi16_perm_low_mask1(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) {
-; GENERIC-LABEL: test_masked_8xi16_perm_low_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[0,3,0,0,4,5,6,7] sched: [1:0.50]
-; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi16_perm_low_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[0,3,0,0,4,5,6,7] sched: [1:1.00]
-; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
- %cmp = icmp eq <8 x i16> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2
- ret <8 x i16> %res
-}
-
-define <8 x i16> @test_masked_z_8xi16_perm_low_mask1(<8 x i16> %vec, <8 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,3,0,0,4,5,6,7] sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi16_perm_low_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,3,0,0,4,5,6,7] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
- %cmp = icmp eq <8 x i16> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
- ret <8 x i16> %res
-}
-define <8 x i16> @test_masked_8xi16_perm_high_mask2(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) {
-; GENERIC-LABEL: test_masked_8xi16_perm_high_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,4,4,5] sched: [1:0.50]
-; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi16_perm_high_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,4,4,5] sched: [1:1.00]
-; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 4, i32 5>
- %cmp = icmp eq <8 x i16> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2
- ret <8 x i16> %res
-}
-
-define <8 x i16> @test_masked_z_8xi16_perm_high_mask2(<8 x i16> %vec, <8 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,4,4,5] sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi16_perm_high_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,4,4,5] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 4, i32 5>
- %cmp = icmp eq <8 x i16> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
- ret <8 x i16> %res
-}
-define <8 x i16> @test_8xi16_perm_low_mask3(<8 x i16> %vec) {
-; GENERIC-LABEL: test_8xi16_perm_low_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,1,1,1,4,5,6,7] sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi16_perm_low_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,1,1,1,4,5,6,7] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 2, i32 1, i32 1, i32 1, i32 4, i32 5, i32 6, i32 7>
- ret <8 x i16> %res
-}
-define <8 x i16> @test_masked_8xi16_perm_low_mask3(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) {
-; GENERIC-LABEL: test_masked_8xi16_perm_low_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[2,1,1,1,4,5,6,7] sched: [1:0.50]
-; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi16_perm_low_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[2,1,1,1,4,5,6,7] sched: [1:1.00]
-; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 2, i32 1, i32 1, i32 1, i32 4, i32 5, i32 6, i32 7>
- %cmp = icmp eq <8 x i16> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2
- ret <8 x i16> %res
-}
-
-define <8 x i16> @test_masked_z_8xi16_perm_low_mask3(<8 x i16> %vec, <8 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1,1,1,4,5,6,7] sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi16_perm_low_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1,1,1,4,5,6,7] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 2, i32 1, i32 1, i32 1, i32 4, i32 5, i32 6, i32 7>
- %cmp = icmp eq <8 x i16> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
- ret <8 x i16> %res
-}
-define <8 x i16> @test_masked_8xi16_perm_high_mask4(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) {
-; GENERIC-LABEL: test_masked_8xi16_perm_high_mask4:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,5,7,6] sched: [1:0.50]
-; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi16_perm_high_mask4:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,5,7,6] sched: [1:1.00]
-; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 7, i32 6>
- %cmp = icmp eq <8 x i16> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2
- ret <8 x i16> %res
-}
-
-define <8 x i16> @test_masked_z_8xi16_perm_high_mask4(<8 x i16> %vec, <8 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask4:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,5,7,6] sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi16_perm_high_mask4:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,5,7,6] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 7, i32 6>
- %cmp = icmp eq <8 x i16> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
- ret <8 x i16> %res
-}
-define <8 x i16> @test_masked_8xi16_perm_low_mask5(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) {
-; GENERIC-LABEL: test_masked_8xi16_perm_low_mask5:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[3,3,2,1,4,5,6,7] sched: [1:0.50]
-; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi16_perm_low_mask5:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[3,3,2,1,4,5,6,7] sched: [1:1.00]
-; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 2, i32 1, i32 4, i32 5, i32 6, i32 7>
- %cmp = icmp eq <8 x i16> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2
- ret <8 x i16> %res
-}
-
-define <8 x i16> @test_masked_z_8xi16_perm_low_mask5(<8 x i16> %vec, <8 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask5:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3,2,1,4,5,6,7] sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi16_perm_low_mask5:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3,2,1,4,5,6,7] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 2, i32 1, i32 4, i32 5, i32 6, i32 7>
- %cmp = icmp eq <8 x i16> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
- ret <8 x i16> %res
-}
-define <8 x i16> @test_8xi16_perm_high_mask6(<8 x i16> %vec) {
-; GENERIC-LABEL: test_8xi16_perm_high_mask6:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,5] sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi16_perm_high_mask6:
-; SKX: # %bb.0:
-; SKX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,5] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 6, i32 5>
- ret <8 x i16> %res
-}
-define <8 x i16> @test_masked_8xi16_perm_high_mask6(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) {
-; GENERIC-LABEL: test_masked_8xi16_perm_high_mask6:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,6,5] sched: [1:0.50]
-; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi16_perm_high_mask6:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,6,5] sched: [1:1.00]
-; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 6, i32 5>
- %cmp = icmp eq <8 x i16> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2
- ret <8 x i16> %res
-}
-
-define <8 x i16> @test_masked_z_8xi16_perm_high_mask6(<8 x i16> %vec, <8 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mask6:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,6,5] sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi16_perm_high_mask6:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,6,5] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 6, i32 5>
- %cmp = icmp eq <8 x i16> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
- ret <8 x i16> %res
-}
-define <8 x i16> @test_masked_8xi16_perm_low_mask7(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) {
-; GENERIC-LABEL: test_masked_8xi16_perm_low_mask7:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0,4,5,6,7] sched: [1:0.50]
-; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi16_perm_low_mask7:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0,4,5,6,7] sched: [1:1.00]
-; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 0, i32 4, i32 5, i32 6, i32 7>
- %cmp = icmp eq <8 x i16> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2
- ret <8 x i16> %res
-}
-
-define <8 x i16> @test_masked_z_8xi16_perm_low_mask7(<8 x i16> %vec, <8 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mask7:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0,4,5,6,7] sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi16_perm_low_mask7:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0,4,5,6,7] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 0, i32 4, i32 5, i32 6, i32 7>
- %cmp = icmp eq <8 x i16> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
- ret <8 x i16> %res
-}
-define <8 x i16> @test_8xi16_perm_high_mem_mask0(<8 x i16>* %vp) {
-; GENERIC-LABEL: test_8xi16_perm_high_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,7,4,6] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi16_perm_high_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,7,4,6] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i16>, <8 x i16>* %vp
- %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 4, i32 6>
- ret <8 x i16> %res
-}
-define <8 x i16> @test_masked_8xi16_perm_high_mem_mask0(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) {
-; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,7,4,6] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi16_perm_high_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,7,4,6] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i16>, <8 x i16>* %vp
- %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 4, i32 6>
- %cmp = icmp eq <8 x i16> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2
- ret <8 x i16> %res
-}
-
-define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask0(<8 x i16>* %vp, <8 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,7,4,6] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi16_perm_high_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,7,4,6] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i16>, <8 x i16>* %vp
- %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 4, i32 6>
- %cmp = icmp eq <8 x i16> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
- ret <8 x i16> %res
-}
-
-define <8 x i16> @test_masked_8xi16_perm_low_mem_mask1(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) {
-; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[1,3,3,2,4,5,6,7] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi16_perm_low_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[1,3,3,2,4,5,6,7] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i16>, <8 x i16>* %vp
- %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
- %cmp = icmp eq <8 x i16> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2
- ret <8 x i16> %res
-}
-
-define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask1(<8 x i16>* %vp, <8 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi16_perm_low_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i16>, <8 x i16>* %vp
- %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
- %cmp = icmp eq <8 x i16> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
- ret <8 x i16> %res
-}
-
-define <8 x i16> @test_masked_8xi16_perm_high_mem_mask2(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) {
-; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,6,6,5,7] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi16_perm_high_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,6,6,5,7] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i16>, <8 x i16>* %vp
- %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 6, i32 5, i32 7>
- %cmp = icmp eq <8 x i16> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2
- ret <8 x i16> %res
-}
-
-define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask2(<8 x i16>* %vp, <8 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,6,6,5,7] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi16_perm_high_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,6,6,5,7] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i16>, <8 x i16>* %vp
- %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 6, i32 5, i32 7>
- %cmp = icmp eq <8 x i16> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
- ret <8 x i16> %res
-}
-
-define <8 x i16> @test_8xi16_perm_low_mem_mask3(<8 x i16>* %vp) {
-; GENERIC-LABEL: test_8xi16_perm_low_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 = mem[3,1,2,0,4,5,6,7] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi16_perm_low_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vpshuflw {{.*#+}} xmm0 = mem[3,1,2,0,4,5,6,7] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i16>, <8 x i16>* %vp
- %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 4, i32 5, i32 6, i32 7>
- ret <8 x i16> %res
-}
-define <8 x i16> @test_masked_8xi16_perm_low_mem_mask3(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) {
-; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[3,1,2,0,4,5,6,7] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi16_perm_low_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[3,1,2,0,4,5,6,7] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i16>, <8 x i16>* %vp
- %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 4, i32 5, i32 6, i32 7>
- %cmp = icmp eq <8 x i16> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2
- ret <8 x i16> %res
-}
-
-define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask3(<8 x i16>* %vp, <8 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[3,1,2,0,4,5,6,7] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi16_perm_low_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[3,1,2,0,4,5,6,7] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i16>, <8 x i16>* %vp
- %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 4, i32 5, i32 6, i32 7>
- %cmp = icmp eq <8 x i16> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
- ret <8 x i16> %res
-}
-
-define <8 x i16> @test_masked_8xi16_perm_high_mem_mask4(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) {
-; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask4:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,6,7,5] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi16_perm_high_mem_mask4:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,6,7,5] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i16>, <8 x i16>* %vp
- %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 7, i32 5>
- %cmp = icmp eq <8 x i16> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2
- ret <8 x i16> %res
-}
-
-define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask4(<8 x i16>* %vp, <8 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask4:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,6,7,5] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi16_perm_high_mem_mask4:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,6,7,5] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i16>, <8 x i16>* %vp
- %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 7, i32 5>
- %cmp = icmp eq <8 x i16> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
- ret <8 x i16> %res
-}
-
-define <8 x i16> @test_masked_8xi16_perm_low_mem_mask5(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) {
-; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask5:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[2,1,3,2,4,5,6,7] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi16_perm_low_mem_mask5:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[2,1,3,2,4,5,6,7] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i16>, <8 x i16>* %vp
- %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 2, i32 1, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
- %cmp = icmp eq <8 x i16> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2
- ret <8 x i16> %res
-}
-
-define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask5(<8 x i16>* %vp, <8 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask5:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[2,1,3,2,4,5,6,7] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi16_perm_low_mem_mask5:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[2,1,3,2,4,5,6,7] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i16>, <8 x i16>* %vp
- %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 2, i32 1, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
- %cmp = icmp eq <8 x i16> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
- ret <8 x i16> %res
-}
-
-define <8 x i16> @test_8xi16_perm_high_mem_mask6(<8 x i16>* %vp) {
-; GENERIC-LABEL: test_8xi16_perm_high_mem_mask6:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,4,4,4] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi16_perm_high_mem_mask6:
-; SKX: # %bb.0:
-; SKX-NEXT: vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,4,4,4] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i16>, <8 x i16>* %vp
- %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 4, i32 4>
- ret <8 x i16> %res
-}
-define <8 x i16> @test_masked_8xi16_perm_high_mem_mask6(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) {
-; GENERIC-LABEL: test_masked_8xi16_perm_high_mem_mask6:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,4,4,4] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi16_perm_high_mem_mask6:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,4,4,4] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i16>, <8 x i16>* %vp
- %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 4, i32 4>
- %cmp = icmp eq <8 x i16> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2
- ret <8 x i16> %res
-}
-
-define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask6(<8 x i16>* %vp, <8 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi16_perm_high_mem_mask6:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,4,4,4] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi16_perm_high_mem_mask6:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,4,4,4] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i16>, <8 x i16>* %vp
- %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 4, i32 4>
- %cmp = icmp eq <8 x i16> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
- ret <8 x i16> %res
-}
-
-define <8 x i16> @test_masked_8xi16_perm_low_mem_mask7(<8 x i16>* %vp, <8 x i16> %vec2, <8 x i16> %mask) {
-; GENERIC-LABEL: test_masked_8xi16_perm_low_mem_mask7:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[0,3,3,1,4,5,6,7] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_8xi16_perm_low_mem_mask7:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[0,3,3,1,4,5,6,7] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i16>, <8 x i16>* %vp
- %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 3, i32 3, i32 1, i32 4, i32 5, i32 6, i32 7>
- %cmp = icmp eq <8 x i16> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2
- ret <8 x i16> %res
-}
-
-define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask7(<8 x i16>* %vp, <8 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_8xi16_perm_low_mem_mask7:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[0,3,3,1,4,5,6,7] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_8xi16_perm_low_mem_mask7:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %xmm0, %xmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[0,3,3,1,4,5,6,7] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i16>, <8 x i16>* %vp
- %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 3, i32 3, i32 1, i32 4, i32 5, i32 6, i32 7>
- %cmp = icmp eq <8 x i16> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
- ret <8 x i16> %res
-}
-
-define <16 x i16> @test_16xi16_perm_high_mask0(<16 x i16> %vec) {
-; GENERIC-LABEL: test_16xi16_perm_high_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xi16_perm_high_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 14, i32 12>
- ret <16 x i16> %res
-}
-define <16 x i16> @test_masked_16xi16_perm_high_mask0(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_16xi16_perm_high_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi16_perm_high_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] sched: [1:1.00]
-; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 14, i32 12>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
- ret <16 x i16> %res
-}
-
-define <16 x i16> @test_masked_z_16xi16_perm_high_mask0(<16 x i16> %vec, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi16_perm_high_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 14, i32 12>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
- ret <16 x i16> %res
-}
-define <16 x i16> @test_masked_16xi16_perm_low_mask1(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_16xi16_perm_low_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[0,2,3,2,4,5,6,7,8,10,11,10,12,13,14,15] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi16_perm_low_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[0,2,3,2,4,5,6,7,8,10,11,10,12,13,14,15] sched: [1:1.00]
-; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 2, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 8, i32 10, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
- ret <16 x i16> %res
-}
-
-define <16 x i16> @test_masked_z_16xi16_perm_low_mask1(<16 x i16> %vec, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,2,3,2,4,5,6,7,8,10,11,10,12,13,14,15] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi16_perm_low_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,2,3,2,4,5,6,7,8,10,11,10,12,13,14,15] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 2, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 8, i32 10, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
- ret <16 x i16> %res
-}
-define <16 x i16> @test_masked_16xi16_perm_high_mask2(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_16xi16_perm_high_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,7,5,5,5,8,9,10,11,15,13,13,13] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi16_perm_high_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,7,5,5,5,8,9,10,11,15,13,13,13] sched: [1:1.00]
-; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 5, i32 5, i32 5, i32 8, i32 9, i32 10, i32 11, i32 15, i32 13, i32 13, i32 13>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
- ret <16 x i16> %res
-}
-
-define <16 x i16> @test_masked_z_16xi16_perm_high_mask2(<16 x i16> %vec, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,7,5,5,5,8,9,10,11,15,13,13,13] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi16_perm_high_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,7,5,5,5,8,9,10,11,15,13,13,13] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 5, i32 5, i32 5, i32 8, i32 9, i32 10, i32 11, i32 15, i32 13, i32 13, i32 13>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
- ret <16 x i16> %res
-}
-define <16 x i16> @test_16xi16_perm_low_mask3(<16 x i16> %vec) {
-; GENERIC-LABEL: test_16xi16_perm_low_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xi16_perm_low_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15>
- ret <16 x i16> %res
-}
-define <16 x i16> @test_masked_16xi16_perm_low_mask3(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_16xi16_perm_low_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi16_perm_low_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] sched: [1:1.00]
-; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
- ret <16 x i16> %res
-}
-
-define <16 x i16> @test_masked_z_16xi16_perm_low_mask3(<16 x i16> %vec, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi16_perm_low_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
- ret <16 x i16> %res
-}
-define <16 x i16> @test_masked_16xi16_perm_high_mask4(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_16xi16_perm_high_mask4:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi16_perm_high_mask4:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15] sched: [1:1.00]
-; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 7, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 14, i32 15, i32 12, i32 15>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
- ret <16 x i16> %res
-}
-
-define <16 x i16> @test_masked_z_16xi16_perm_high_mask4(<16 x i16> %vec, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mask4:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi16_perm_high_mask4:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 7, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 14, i32 15, i32 12, i32 15>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
- ret <16 x i16> %res
-}
-define <16 x i16> @test_masked_16xi16_perm_low_mask5(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_16xi16_perm_low_mask5:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,0,4,5,6,7,11,11,11,8,12,13,14,15] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi16_perm_low_mask5:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,0,4,5,6,7,11,11,11,8,12,13,14,15] sched: [1:1.00]
-; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 11, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
- ret <16 x i16> %res
-}
-
-define <16 x i16> @test_masked_z_16xi16_perm_low_mask5(<16 x i16> %vec, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mask5:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,0,4,5,6,7,11,11,11,8,12,13,14,15] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi16_perm_low_mask5:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,0,4,5,6,7,11,11,11,8,12,13,14,15] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 11, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
- ret <16 x i16> %res
-}
-define <16 x i16> @test_16xi16_perm_high_mask6(<16 x i16> %vec) {
-; GENERIC-LABEL: test_16xi16_perm_high_mask6:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xi16_perm_high_mask6:
-; SKX: # %bb.0:
-; SKX-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 7, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 14, i32 15, i32 14, i32 13>
- ret <16 x i16> %res
-}
-define <16 x i16> @test_masked_16xi16_perm_high_mask6(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_16xi16_perm_high_mask6:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi16_perm_high_mask6:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] sched: [1:1.00]
-; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 7, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 14, i32 15, i32 14, i32 13>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
- ret <16 x i16> %res
-}
-
-define <16 x i16> @test_masked_z_16xi16_perm_high_mask6(<16 x i16> %vec, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mask6:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi16_perm_high_mask6:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 7, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 14, i32 15, i32 14, i32 13>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
- ret <16 x i16> %res
-}
-define <16 x i16> @test_masked_16xi16_perm_low_mask7(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_16xi16_perm_low_mask7:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,2,1,2,4,5,6,7,11,10,9,10,12,13,14,15] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi16_perm_low_mask7:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,2,1,2,4,5,6,7,11,10,9,10,12,13,14,15] sched: [1:1.00]
-; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 9, i32 10, i32 12, i32 13, i32 14, i32 15>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
- ret <16 x i16> %res
-}
-
-define <16 x i16> @test_masked_z_16xi16_perm_low_mask7(<16 x i16> %vec, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mask7:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,1,2,4,5,6,7,11,10,9,10,12,13,14,15] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi16_perm_low_mask7:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,1,2,4,5,6,7,11,10,9,10,12,13,14,15] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 9, i32 10, i32 12, i32 13, i32 14, i32 15>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
- ret <16 x i16> %res
-}
-define <16 x i16> @test_16xi16_perm_high_mem_mask0(<16 x i16>* %vp) {
-; GENERIC-LABEL: test_16xi16_perm_high_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xi16_perm_high_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i16>, <16 x i16>* %vp
- %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 13, i32 14, i32 12, i32 15>
- ret <16 x i16> %res
-}
-define <16 x i16> @test_masked_16xi16_perm_high_mem_mask0(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi16_perm_high_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i16>, <16 x i16>* %vp
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 13, i32 14, i32 12, i32 15>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
- ret <16 x i16> %res
-}
-
-define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask0(<16 x i16>* %vp, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi16_perm_high_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i16>, <16 x i16>* %vp
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 13, i32 14, i32 12, i32 15>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
- ret <16 x i16> %res
-}
-
-define <16 x i16> @test_masked_16xi16_perm_low_mem_mask1(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi16_perm_low_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i16>, <16 x i16>* %vp
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 11, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
- ret <16 x i16> %res
-}
-
-define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask1(<16 x i16>* %vp, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi16_perm_low_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i16>, <16 x i16>* %vp
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 11, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
- ret <16 x i16> %res
-}
-
-define <16 x i16> @test_masked_16xi16_perm_high_mem_mask2(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi16_perm_high_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i16>, <16 x i16>* %vp
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 13, i32 14, i32 13, i32 14>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
- ret <16 x i16> %res
-}
-
-define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask2(<16 x i16>* %vp, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi16_perm_high_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i16>, <16 x i16>* %vp
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 13, i32 14, i32 13, i32 14>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
- ret <16 x i16> %res
-}
-
-define <16 x i16> @test_16xi16_perm_low_mem_mask3(<16 x i16>* %vp) {
-; GENERIC-LABEL: test_16xi16_perm_low_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xi16_perm_low_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vpshuflw {{.*#+}} ymm0 = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i16>, <16 x i16>* %vp
- %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15>
- ret <16 x i16> %res
-}
-define <16 x i16> @test_masked_16xi16_perm_low_mem_mask3(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi16_perm_low_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i16>, <16 x i16>* %vp
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
- ret <16 x i16> %res
-}
-
-define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask3(<16 x i16>* %vp, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi16_perm_low_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i16>, <16 x i16>* %vp
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
- ret <16 x i16> %res
-}
-
-define <16 x i16> @test_masked_16xi16_perm_high_mem_mask4(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask4:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi16_perm_high_mem_mask4:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i16>, <16 x i16>* %vp
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 15, i32 15, i32 14, i32 15>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
- ret <16 x i16> %res
-}
-
-define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask4(<16 x i16>* %vp, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask4:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi16_perm_high_mem_mask4:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i16>, <16 x i16>* %vp
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 15, i32 15, i32 14, i32 15>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
- ret <16 x i16> %res
-}
-
-define <16 x i16> @test_masked_16xi16_perm_low_mem_mask5(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask5:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi16_perm_low_mem_mask5:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i16>, <16 x i16>* %vp
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 9, i32 11, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
- ret <16 x i16> %res
-}
-
-define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask5(<16 x i16>* %vp, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask5:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi16_perm_low_mem_mask5:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i16>, <16 x i16>* %vp
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 9, i32 11, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
- ret <16 x i16> %res
-}
-
-define <16 x i16> @test_16xi16_perm_high_mem_mask6(<16 x i16>* %vp) {
-; GENERIC-LABEL: test_16xi16_perm_high_mem_mask6:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xi16_perm_high_mem_mask6:
-; SKX: # %bb.0:
-; SKX-NEXT: vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i16>, <16 x i16>* %vp
- %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 5, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 12, i32 13>
- ret <16 x i16> %res
-}
-define <16 x i16> @test_masked_16xi16_perm_high_mem_mask6(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_16xi16_perm_high_mem_mask6:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi16_perm_high_mem_mask6:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i16>, <16 x i16>* %vp
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 5, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 12, i32 13>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
- ret <16 x i16> %res
-}
-
-define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask6(<16 x i16>* %vp, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi16_perm_high_mem_mask6:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi16_perm_high_mem_mask6:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i16>, <16 x i16>* %vp
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 5, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 12, i32 13>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
- ret <16 x i16> %res
-}
-
-define <16 x i16> @test_masked_16xi16_perm_low_mem_mask7(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_16xi16_perm_low_mem_mask7:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_16xi16_perm_low_mem_mask7:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i16>, <16 x i16>* %vp
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 1, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 9, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
- ret <16 x i16> %res
-}
-
-define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask7(<16 x i16>* %vp, <16 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_16xi16_perm_low_mem_mask7:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_16xi16_perm_low_mem_mask7:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %ymm0, %ymm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i16>, <16 x i16>* %vp
- %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 1, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 9, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15>
- %cmp = icmp eq <16 x i16> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
- ret <16 x i16> %res
-}
-
-define <32 x i16> @test_32xi16_perm_high_mask0(<32 x i16> %vec) {
-; GENERIC-LABEL: test_32xi16_perm_high_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_32xi16_perm_high_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 12, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 20, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 28>
- ret <32 x i16> %res
-}
-define <32 x i16> @test_masked_32xi16_perm_high_mask0(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_32xi16_perm_high_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_32xi16_perm_high_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [1:1.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 12, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 20, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 28>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
- ret <32 x i16> %res
-}
-
-define <32 x i16> @test_masked_z_32xi16_perm_high_mask0(<32 x i16> %vec, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_32xi16_perm_high_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 12, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 20, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 28>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
- ret <32 x i16> %res
-}
-define <32 x i16> @test_masked_32xi16_perm_low_mask1(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_32xi16_perm_low_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_32xi16_perm_low_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] sched: [1:1.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 10, i32 9, i32 8, i32 8, i32 12, i32 13, i32 14, i32 15, i32 18, i32 17, i32 16, i32 16, i32 20, i32 21, i32 22, i32 23, i32 26, i32 25, i32 24, i32 24, i32 28, i32 29, i32 30, i32 31>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
- ret <32 x i16> %res
-}
-
-define <32 x i16> @test_masked_z_32xi16_perm_low_mask1(<32 x i16> %vec, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_32xi16_perm_low_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 10, i32 9, i32 8, i32 8, i32 12, i32 13, i32 14, i32 15, i32 18, i32 17, i32 16, i32 16, i32 20, i32 21, i32 22, i32 23, i32 26, i32 25, i32 24, i32 24, i32 28, i32 29, i32 30, i32 31>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
- ret <32 x i16> %res
-}
-define <32 x i16> @test_masked_32xi16_perm_high_mask2(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_32xi16_perm_high_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_32xi16_perm_high_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] sched: [1:1.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 6, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 14, i32 12, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 22, i32 20, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 30, i32 28, i32 31>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
- ret <32 x i16> %res
-}
-
-define <32 x i16> @test_masked_z_32xi16_perm_high_mask2(<32 x i16> %vec, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_32xi16_perm_high_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 6, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 14, i32 12, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 22, i32 20, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 30, i32 28, i32 31>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
- ret <32 x i16> %res
-}
-define <32 x i16> @test_32xi16_perm_low_mask3(<32 x i16> %vec) {
-; GENERIC-LABEL: test_32xi16_perm_low_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_32xi16_perm_low_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vpshuflw {{.*#+}} zmm0 = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 11, i32 11, i32 9, i32 11, i32 12, i32 13, i32 14, i32 15, i32 19, i32 19, i32 17, i32 19, i32 20, i32 21, i32 22, i32 23, i32 27, i32 27, i32 25, i32 27, i32 28, i32 29, i32 30, i32 31>
- ret <32 x i16> %res
-}
-define <32 x i16> @test_masked_32xi16_perm_low_mask3(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_32xi16_perm_low_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_32xi16_perm_low_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [1:1.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 11, i32 11, i32 9, i32 11, i32 12, i32 13, i32 14, i32 15, i32 19, i32 19, i32 17, i32 19, i32 20, i32 21, i32 22, i32 23, i32 27, i32 27, i32 25, i32 27, i32 28, i32 29, i32 30, i32 31>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
- ret <32 x i16> %res
-}
-
-define <32 x i16> @test_masked_z_32xi16_perm_low_mask3(<32 x i16> %vec, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_32xi16_perm_low_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 11, i32 11, i32 9, i32 11, i32 12, i32 13, i32 14, i32 15, i32 19, i32 19, i32 17, i32 19, i32 20, i32 21, i32 22, i32 23, i32 27, i32 27, i32 25, i32 27, i32 28, i32 29, i32 30, i32 31>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
- ret <32 x i16> %res
-}
-define <32 x i16> @test_masked_32xi16_perm_high_mask4(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_32xi16_perm_high_mask4:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_32xi16_perm_high_mask4:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] sched: [1:1.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 15, i32 15, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 23, i32 23, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 31, i32 31, i32 29, i32 30>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
- ret <32 x i16> %res
-}
-
-define <32 x i16> @test_masked_z_32xi16_perm_high_mask4(<32 x i16> %vec, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mask4:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_32xi16_perm_high_mask4:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 15, i32 15, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 23, i32 23, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 31, i32 31, i32 29, i32 30>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
- ret <32 x i16> %res
-}
-define <32 x i16> @test_masked_32xi16_perm_low_mask5(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_32xi16_perm_low_mask5:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_32xi16_perm_low_mask5:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] sched: [1:1.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 1, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7, i32 10, i32 9, i32 9, i32 8, i32 12, i32 13, i32 14, i32 15, i32 18, i32 17, i32 17, i32 16, i32 20, i32 21, i32 22, i32 23, i32 26, i32 25, i32 25, i32 24, i32 28, i32 29, i32 30, i32 31>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
- ret <32 x i16> %res
-}
-
-define <32 x i16> @test_masked_z_32xi16_perm_low_mask5(<32 x i16> %vec, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mask5:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_32xi16_perm_low_mask5:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 1, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7, i32 10, i32 9, i32 9, i32 8, i32 12, i32 13, i32 14, i32 15, i32 18, i32 17, i32 17, i32 16, i32 20, i32 21, i32 22, i32 23, i32 26, i32 25, i32 25, i32 24, i32 28, i32 29, i32 30, i32 31>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
- ret <32 x i16> %res
-}
-define <32 x i16> @test_32xi16_perm_high_mask6(<32 x i16> %vec) {
-; GENERIC-LABEL: test_32xi16_perm_high_mask6:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_32xi16_perm_high_mask6:
-; SKX: # %bb.0:
-; SKX-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 20, i32 20, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 28, i32 28, i32 29, i32 30>
- ret <32 x i16> %res
-}
-define <32 x i16> @test_masked_32xi16_perm_high_mask6(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_32xi16_perm_high_mask6:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_32xi16_perm_high_mask6:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [1:1.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 20, i32 20, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 28, i32 28, i32 29, i32 30>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
- ret <32 x i16> %res
-}
-
-define <32 x i16> @test_masked_z_32xi16_perm_high_mask6(<32 x i16> %vec, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mask6:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_32xi16_perm_high_mask6:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 20, i32 20, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 28, i32 28, i32 29, i32 30>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
- ret <32 x i16> %res
-}
-define <32 x i16> @test_masked_32xi16_perm_low_mask7(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_32xi16_perm_low_mask7:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_32xi16_perm_low_mask7:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] sched: [1:1.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 0, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 8, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15, i32 19, i32 16, i32 19, i32 16, i32 20, i32 21, i32 22, i32 23, i32 27, i32 24, i32 27, i32 24, i32 28, i32 29, i32 30, i32 31>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
- ret <32 x i16> %res
-}
-
-define <32 x i16> @test_masked_z_32xi16_perm_low_mask7(<32 x i16> %vec, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mask7:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_32xi16_perm_low_mask7:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 0, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 8, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15, i32 19, i32 16, i32 19, i32 16, i32 20, i32 21, i32 22, i32 23, i32 27, i32 24, i32 27, i32 24, i32 28, i32 29, i32 30, i32 31>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
- ret <32 x i16> %res
-}
-define <32 x i16> @test_32xi16_perm_high_mem_mask0(<32 x i16>* %vp) {
-; GENERIC-LABEL: test_32xi16_perm_high_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_32xi16_perm_high_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i16>, <32 x i16>* %vp
- %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 15, i32 12, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 23, i32 20, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 31, i32 28, i32 29, i32 30>
- ret <32 x i16> %res
-}
-define <32 x i16> @test_masked_32xi16_perm_high_mem_mask0(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_32xi16_perm_high_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i16>, <32 x i16>* %vp
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 15, i32 12, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 23, i32 20, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 31, i32 28, i32 29, i32 30>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
- ret <32 x i16> %res
-}
-
-define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask0(<32 x i16>* %vp, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_32xi16_perm_high_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i16>, <32 x i16>* %vp
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 15, i32 12, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 23, i32 20, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 31, i32 28, i32 29, i32 30>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
- ret <32 x i16> %res
-}
-
-define <32 x i16> @test_masked_32xi16_perm_low_mem_mask1(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_32xi16_perm_low_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i16>, <32 x i16>* %vp
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 11, i32 11, i32 12, i32 13, i32 14, i32 15, i32 17, i32 17, i32 19, i32 19, i32 20, i32 21, i32 22, i32 23, i32 25, i32 25, i32 27, i32 27, i32 28, i32 29, i32 30, i32 31>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
- ret <32 x i16> %res
-}
-
-define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask1(<32 x i16>* %vp, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_32xi16_perm_low_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i16>, <32 x i16>* %vp
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 11, i32 11, i32 12, i32 13, i32 14, i32 15, i32 17, i32 17, i32 19, i32 19, i32 20, i32 21, i32 22, i32 23, i32 25, i32 25, i32 27, i32 27, i32 28, i32 29, i32 30, i32 31>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
- ret <32 x i16> %res
-}
-
-define <32 x i16> @test_masked_32xi16_perm_high_mem_mask2(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_32xi16_perm_high_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i16>, <32 x i16>* %vp
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 7, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 15, i32 14, i32 12, i32 16, i32 17, i32 18, i32 19, i32 20, i32 23, i32 22, i32 20, i32 24, i32 25, i32 26, i32 27, i32 28, i32 31, i32 30, i32 28>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
- ret <32 x i16> %res
-}
-
-define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask2(<32 x i16>* %vp, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_32xi16_perm_high_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i16>, <32 x i16>* %vp
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 7, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 15, i32 14, i32 12, i32 16, i32 17, i32 18, i32 19, i32 20, i32 23, i32 22, i32 20, i32 24, i32 25, i32 26, i32 27, i32 28, i32 31, i32 30, i32 28>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
- ret <32 x i16> %res
-}
-
-define <32 x i16> @test_32xi16_perm_low_mem_mask3(<32 x i16>* %vp) {
-; GENERIC-LABEL: test_32xi16_perm_low_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_32xi16_perm_low_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vpshuflw {{.*#+}} zmm0 = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i16>, <32 x i16>* %vp
- %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 2, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7, i32 10, i32 10, i32 8, i32 11, i32 12, i32 13, i32 14, i32 15, i32 18, i32 18, i32 16, i32 19, i32 20, i32 21, i32 22, i32 23, i32 26, i32 26, i32 24, i32 27, i32 28, i32 29, i32 30, i32 31>
- ret <32 x i16> %res
-}
-define <32 x i16> @test_masked_32xi16_perm_low_mem_mask3(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_32xi16_perm_low_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i16>, <32 x i16>* %vp
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 2, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7, i32 10, i32 10, i32 8, i32 11, i32 12, i32 13, i32 14, i32 15, i32 18, i32 18, i32 16, i32 19, i32 20, i32 21, i32 22, i32 23, i32 26, i32 26, i32 24, i32 27, i32 28, i32 29, i32 30, i32 31>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
- ret <32 x i16> %res
-}
-
-define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask3(<32 x i16>* %vp, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_32xi16_perm_low_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i16>, <32 x i16>* %vp
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 2, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7, i32 10, i32 10, i32 8, i32 11, i32 12, i32 13, i32 14, i32 15, i32 18, i32 18, i32 16, i32 19, i32 20, i32 21, i32 22, i32 23, i32 26, i32 26, i32 24, i32 27, i32 28, i32 29, i32 30, i32 31>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
- ret <32 x i16> %res
-}
-
-define <32 x i16> @test_masked_32xi16_perm_high_mem_mask4(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask4:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_32xi16_perm_high_mem_mask4:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i16>, <32 x i16>* %vp
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 15, i32 12, i32 14, i32 13, i32 16, i32 17, i32 18, i32 19, i32 23, i32 20, i32 22, i32 21, i32 24, i32 25, i32 26, i32 27, i32 31, i32 28, i32 30, i32 29>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
- ret <32 x i16> %res
-}
-
-define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask4(<32 x i16>* %vp, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask4:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_32xi16_perm_high_mem_mask4:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i16>, <32 x i16>* %vp
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 15, i32 12, i32 14, i32 13, i32 16, i32 17, i32 18, i32 19, i32 23, i32 20, i32 22, i32 21, i32 24, i32 25, i32 26, i32 27, i32 31, i32 28, i32 30, i32 29>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
- ret <32 x i16> %res
-}
-
-define <32 x i16> @test_masked_32xi16_perm_low_mem_mask5(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask5:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpshufd {{.*#+}} zmm2 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] sched: [8:1.00]
-; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vmovdqu16 %zmm2, %zmm0 {%k1} # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_32xi16_perm_low_mem_mask5:
-; SKX: # %bb.0:
-; SKX-NEXT: vpshufd {{.*#+}} zmm2 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] sched: [8:1.00]
-; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vmovdqu16 %zmm2, %zmm0 {%k1} # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i16>, <32 x i16>* %vp
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 9, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 16, i32 17, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 24, i32 25, i32 28, i32 29, i32 30, i32 31>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
- ret <32 x i16> %res
-}
-
-define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask5(<32 x i16>* %vp, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask5:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] sched: [8:1.00]
-; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_32xi16_perm_low_mem_mask5:
-; SKX: # %bb.0:
-; SKX-NEXT: vpshufd {{.*#+}} zmm1 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15] sched: [8:1.00]
-; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} {z} # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i16>, <32 x i16>* %vp
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 9, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 16, i32 17, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 24, i32 25, i32 28, i32 29, i32 30, i32 31>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
- ret <32 x i16> %res
-}
-
-define <32 x i16> @test_32xi16_perm_high_mem_mask6(<32 x i16>* %vp) {
-; GENERIC-LABEL: test_32xi16_perm_high_mem_mask6:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_32xi16_perm_high_mem_mask6:
-; SKX: # %bb.0:
-; SKX-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i16>, <32 x i16>* %vp
- %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 6, i32 6, i32 8, i32 9, i32 10, i32 11, i32 14, i32 13, i32 14, i32 14, i32 16, i32 17, i32 18, i32 19, i32 22, i32 21, i32 22, i32 22, i32 24, i32 25, i32 26, i32 27, i32 30, i32 29, i32 30, i32 30>
- ret <32 x i16> %res
-}
-define <32 x i16> @test_masked_32xi16_perm_high_mem_mask6(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_32xi16_perm_high_mem_mask6:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_32xi16_perm_high_mem_mask6:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i16>, <32 x i16>* %vp
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 6, i32 6, i32 8, i32 9, i32 10, i32 11, i32 14, i32 13, i32 14, i32 14, i32 16, i32 17, i32 18, i32 19, i32 22, i32 21, i32 22, i32 22, i32 24, i32 25, i32 26, i32 27, i32 30, i32 29, i32 30, i32 30>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
- ret <32 x i16> %res
-}
-
-define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask6(<32 x i16>* %vp, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_32xi16_perm_high_mem_mask6:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_32xi16_perm_high_mem_mask6:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i16>, <32 x i16>* %vp
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 6, i32 6, i32 8, i32 9, i32 10, i32 11, i32 14, i32 13, i32 14, i32 14, i32 16, i32 17, i32 18, i32 19, i32 22, i32 21, i32 22, i32 22, i32 24, i32 25, i32 26, i32 27, i32 30, i32 29, i32 30, i32 30>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
- ret <32 x i16> %res
-}
-
-define <32 x i16> @test_masked_32xi16_perm_low_mem_mask7(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_32xi16_perm_low_mem_mask7:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_32xi16_perm_low_mem_mask7:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i16>, <32 x i16>* %vp
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 1, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 9, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15, i32 19, i32 17, i32 19, i32 16, i32 20, i32 21, i32 22, i32 23, i32 27, i32 25, i32 27, i32 24, i32 28, i32 29, i32 30, i32 31>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
- ret <32 x i16> %res
-}
-
-define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask7(<32 x i16>* %vp, <32 x i16> %mask) {
-; GENERIC-LABEL: test_masked_z_32xi16_perm_low_mem_mask7:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_32xi16_perm_low_mem_mask7:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmw %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <32 x i16>, <32 x i16>* %vp
- %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 1, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 9, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15, i32 19, i32 17, i32 19, i32 16, i32 20, i32 21, i32 22, i32 23, i32 27, i32 25, i32 27, i32 24, i32 28, i32 29, i32 30, i32 31>
- %cmp = icmp eq <32 x i16> %mask, zeroinitializer
- %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
- ret <32 x i16> %res
-}
-
-define <4 x i32> @test_4xi32_perm_mask0(<4 x i32> %vec) {
-; GENERIC-LABEL: test_4xi32_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,3,0] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xi32_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,3,0] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 3, i32 0>
- ret <4 x i32> %res
-}
-define <4 x i32> @test_masked_4xi32_perm_mask0(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) {
-; GENERIC-LABEL: test_masked_4xi32_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[2,3,3,0] sched: [1:0.50]
-; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_4xi32_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[2,3,3,0] sched: [1:1.00]
-; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 3, i32 0>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2
- ret <4 x i32> %res
-}
-
-define <4 x i32> @test_masked_z_4xi32_perm_mask0(<4 x i32> %vec, <4 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_4xi32_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[2,3,3,0] sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_4xi32_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[2,3,3,0] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 3, i32 0>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
- ret <4 x i32> %res
-}
-define <4 x i32> @test_masked_4xi32_perm_mask1(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) {
-; GENERIC-LABEL: test_masked_4xi32_perm_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0] sched: [1:0.50]
-; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_4xi32_perm_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0] sched: [1:1.00]
-; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 2, i32 0>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2
- ret <4 x i32> %res
-}
-
-define <4 x i32> @test_masked_z_4xi32_perm_mask1(<4 x i32> %vec, <4 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_4xi32_perm_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0] sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_4xi32_perm_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 2, i32 0>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
- ret <4 x i32> %res
-}
-define <4 x i32> @test_masked_4xi32_perm_mask2(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) {
-; GENERIC-LABEL: test_masked_4xi32_perm_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[3,0,1,0] sched: [1:0.50]
-; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_4xi32_perm_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[3,0,1,0] sched: [1:1.00]
-; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 3, i32 0, i32 1, i32 0>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2
- ret <4 x i32> %res
-}
-
-define <4 x i32> @test_masked_z_4xi32_perm_mask2(<4 x i32> %vec, <4 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_4xi32_perm_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[3,0,1,0] sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_4xi32_perm_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[3,0,1,0] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 3, i32 0, i32 1, i32 0>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
- ret <4 x i32> %res
-}
-define <4 x i32> @test_4xi32_perm_mask3(<4 x i32> %vec) {
-; GENERIC-LABEL: test_4xi32_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,0,3] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xi32_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,0,3] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 3>
- ret <4 x i32> %res
-}
-define <4 x i32> @test_masked_4xi32_perm_mask3(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) {
-; GENERIC-LABEL: test_masked_4xi32_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,1,0,3] sched: [1:0.50]
-; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_4xi32_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,1,0,3] sched: [1:1.00]
-; SKX-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 3>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2
- ret <4 x i32> %res
-}
-
-define <4 x i32> @test_masked_z_4xi32_perm_mask3(<4 x i32> %vec, <4 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_4xi32_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,0,3] sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_4xi32_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,0,3] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 3>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
- ret <4 x i32> %res
-}
-define <4 x i32> @test_4xi32_perm_mem_mask0(<4 x i32>* %vp) {
-; GENERIC-LABEL: test_4xi32_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,3,3] sched: [7:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xi32_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,3,3] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <4 x i32>, <4 x i32>* %vp
- %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 3>
- ret <4 x i32> %res
-}
-define <4 x i32> @test_masked_4xi32_perm_mem_mask0(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) {
-; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,1,3,3] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_4xi32_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,1,3,3] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <4 x i32>, <4 x i32>* %vp
- %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 3>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2
- ret <4 x i32> %res
-}
-
-define <4 x i32> @test_masked_z_4xi32_perm_mem_mask0(<4 x i32>* %vp, <4 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,1,3,3] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_4xi32_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,1,3,3] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <4 x i32>, <4 x i32>* %vp
- %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 3>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
- ret <4 x i32> %res
-}
-
-define <4 x i32> @test_masked_4xi32_perm_mem_mask1(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) {
-; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[2,2,3,1] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_4xi32_perm_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[2,2,3,1] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <4 x i32>, <4 x i32>* %vp
- %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 1>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2
- ret <4 x i32> %res
-}
-
-define <4 x i32> @test_masked_z_4xi32_perm_mem_mask1(<4 x i32>* %vp, <4 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[2,2,3,1] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_4xi32_perm_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[2,2,3,1] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <4 x i32>, <4 x i32>* %vp
- %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 1>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
- ret <4 x i32> %res
-}
-
-define <4 x i32> @test_masked_4xi32_perm_mem_mask2(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) {
-; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,3,0,1] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_4xi32_perm_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,3,0,1] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <4 x i32>, <4 x i32>* %vp
- %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 3, i32 0, i32 1>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2
- ret <4 x i32> %res
-}
-
-define <4 x i32> @test_masked_z_4xi32_perm_mem_mask2(<4 x i32>* %vp, <4 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,3,0,1] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_4xi32_perm_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,3,0,1] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <4 x i32>, <4 x i32>* %vp
- %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 3, i32 0, i32 1>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
- ret <4 x i32> %res
-}
-
-define <4 x i32> @test_4xi32_perm_mem_mask3(<4 x i32>* %vp) {
-; GENERIC-LABEL: test_4xi32_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermilps {{.*#+}} xmm0 = mem[1,0,1,0] sched: [7:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xi32_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vpermilps {{.*#+}} xmm0 = mem[1,0,1,0] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <4 x i32>, <4 x i32>* %vp
- %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
- ret <4 x i32> %res
-}
-define <4 x i32> @test_masked_4xi32_perm_mem_mask3(<4 x i32>* %vp, <4 x i32> %vec2, <4 x i32> %mask) {
-; GENERIC-LABEL: test_masked_4xi32_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[1,0,1,0] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_4xi32_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[1,0,1,0] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <4 x i32>, <4 x i32>* %vp
- %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2
- ret <4 x i32> %res
-}
-
-define <4 x i32> @test_masked_z_4xi32_perm_mem_mask3(<4 x i32>* %vp, <4 x i32> %mask) {
-; GENERIC-LABEL: test_masked_z_4xi32_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[1,0,1,0] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_masked_z_4xi32_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm0, %xmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[1,0,1,0] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <4 x i32>, <4 x i32>* %vp
- %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
- ret <4 x i32> %res
-}
-
-define <8 x i32> @test2_8xi32_perm_mask0(<8 x i32> %vec) {
-; GENERIC-LABEL: test2_8xi32_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,1,0,6,7,5,4] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_8xi32_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,3,1,0,6,7,5,4] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 1, i32 0, i32 6, i32 7, i32 5, i32 4>
- ret <8 x i32> %res
-}
-define <8 x i32> @test2_masked_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test2_masked_8xi32_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[2,3,1,0,6,7,5,4] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_masked_8xi32_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[2,3,1,0,6,7,5,4] sched: [1:1.00]
-; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 1, i32 0, i32 6, i32 7, i32 5, i32 4>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
- ret <8 x i32> %res
-}
-
-define <8 x i32> @test2_masked_z_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %mask) {
-; GENERIC-LABEL: test2_masked_z_8xi32_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3,1,0,6,7,5,4] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_masked_z_8xi32_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3,1,0,6,7,5,4] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 1, i32 0, i32 6, i32 7, i32 5, i32 4>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
- ret <8 x i32> %res
-}
-define <8 x i32> @test2_masked_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test2_masked_8xi32_perm_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,3,4,7,7,7] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_masked_8xi32_perm_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,3,4,7,7,7] sched: [1:1.00]
-; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 3, i32 3, i32 3, i32 4, i32 7, i32 7, i32 7>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
- ret <8 x i32> %res
-}
-
-define <8 x i32> @test2_masked_z_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %mask) {
-; GENERIC-LABEL: test2_masked_z_8xi32_perm_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,3,4,7,7,7] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_masked_z_8xi32_perm_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,3,4,7,7,7] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 3, i32 3, i32 3, i32 4, i32 7, i32 7, i32 7>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
- ret <8 x i32> %res
-}
-define <8 x i32> @test2_masked_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test2_masked_8xi32_perm_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3,5,6,4,7] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_masked_8xi32_perm_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3,5,6,4,7] sched: [1:1.00]
-; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 0, i32 3, i32 5, i32 6, i32 4, i32 7>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
- ret <8 x i32> %res
-}
-
-define <8 x i32> @test2_masked_z_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %mask) {
-; GENERIC-LABEL: test2_masked_z_8xi32_perm_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3,5,6,4,7] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_masked_z_8xi32_perm_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3,5,6,4,7] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 0, i32 3, i32 5, i32 6, i32 4, i32 7>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
- ret <8 x i32> %res
-}
-define <8 x i32> @test2_8xi32_perm_mask3(<8 x i32> %vec) {
-; GENERIC-LABEL: test2_8xi32_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,3,1,0,5,7,5,4] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_8xi32_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,3,1,0,5,7,5,4] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 1, i32 0, i32 5, i32 7, i32 5, i32 4>
- ret <8 x i32> %res
-}
-define <8 x i32> @test2_masked_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test2_masked_8xi32_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,3,1,0,5,7,5,4] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_masked_8xi32_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,3,1,0,5,7,5,4] sched: [1:1.00]
-; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 1, i32 0, i32 5, i32 7, i32 5, i32 4>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
- ret <8 x i32> %res
-}
-
-define <8 x i32> @test2_masked_z_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %mask) {
-; GENERIC-LABEL: test2_masked_z_8xi32_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,3,1,0,5,7,5,4] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_masked_z_8xi32_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,3,1,0,5,7,5,4] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 1, i32 0, i32 5, i32 7, i32 5, i32 4>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
- ret <8 x i32> %res
-}
-define <8 x i32> @test2_8xi32_perm_mem_mask0(<8 x i32>* %vp) {
-; GENERIC-LABEL: test2_8xi32_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermilps {{.*#+}} ymm0 = mem[1,0,2,0,5,4,6,4] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_8xi32_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vpermilps {{.*#+}} ymm0 = mem[1,0,2,0,5,4,6,4] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i32>, <8 x i32>* %vp
- %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 0, i32 5, i32 4, i32 6, i32 4>
- ret <8 x i32> %res
-}
-define <8 x i32> @test2_masked_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[1,0,2,0,5,4,6,4] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_masked_8xi32_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[1,0,2,0,5,4,6,4] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i32>, <8 x i32>* %vp
- %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 0, i32 5, i32 4, i32 6, i32 4>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
- ret <8 x i32> %res
-}
-
-define <8 x i32> @test2_masked_z_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %mask) {
-; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[1,0,2,0,5,4,6,4] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_masked_z_8xi32_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[1,0,2,0,5,4,6,4] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i32>, <8 x i32>* %vp
- %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 0, i32 5, i32 4, i32 6, i32 4>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
- ret <8 x i32> %res
-}
-
-define <8 x i32> @test2_masked_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_masked_8xi32_perm_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[0,3,2,0,4,7,6,4] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i32>, <8 x i32>* %vp
- %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 3, i32 2, i32 0, i32 4, i32 7, i32 6, i32 4>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
- ret <8 x i32> %res
-}
-
-define <8 x i32> @test2_masked_z_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %mask) {
-; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_masked_z_8xi32_perm_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i32>, <8 x i32>* %vp
- %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 3, i32 2, i32 0, i32 4, i32 7, i32 6, i32 4>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
- ret <8 x i32> %res
-}
-
-define <8 x i32> @test2_masked_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,3,1,7,6,7,5] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_masked_8xi32_perm_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,3,1,7,6,7,5] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i32>, <8 x i32>* %vp
- %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 3, i32 1, i32 7, i32 6, i32 7, i32 5>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
- ret <8 x i32> %res
-}
-
-define <8 x i32> @test2_masked_z_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %mask) {
-; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,1,7,6,7,5] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_masked_z_8xi32_perm_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,1,7,6,7,5] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i32>, <8 x i32>* %vp
- %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 3, i32 1, i32 7, i32 6, i32 7, i32 5>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
- ret <8 x i32> %res
-}
-
-define <8 x i32> @test2_8xi32_perm_mem_mask3(<8 x i32>* %vp) {
-; GENERIC-LABEL: test2_8xi32_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermilps {{.*#+}} ymm0 = mem[3,2,0,0,7,6,4,4] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_8xi32_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vpermilps {{.*#+}} ymm0 = mem[3,2,0,0,7,6,4,4] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i32>, <8 x i32>* %vp
- %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 0, i32 0, i32 7, i32 6, i32 4, i32 4>
- ret <8 x i32> %res
-}
-define <8 x i32> @test2_masked_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test2_masked_8xi32_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,0,0,7,6,4,4] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_masked_8xi32_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,0,0,7,6,4,4] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i32>, <8 x i32>* %vp
- %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 0, i32 0, i32 7, i32 6, i32 4, i32 4>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
- ret <8 x i32> %res
-}
-
-define <8 x i32> @test2_masked_z_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %mask) {
-; GENERIC-LABEL: test2_masked_z_8xi32_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,0,0,7,6,4,4] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_masked_z_8xi32_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm0, %ymm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,0,0,7,6,4,4] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <8 x i32>, <8 x i32>* %vp
- %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 0, i32 0, i32 7, i32 6, i32 4, i32 4>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
- ret <8 x i32> %res
-}
-
-define <16 x i32> @test2_16xi32_perm_mask0(<16 x i32> %vec) {
-; GENERIC-LABEL: test2_16xi32_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_16xi32_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 3, i32 0, i32 7, i32 5, i32 7, i32 4, i32 11, i32 9, i32 11, i32 8, i32 15, i32 13, i32 15, i32 12>
- ret <16 x i32> %res
-}
-define <16 x i32> @test2_masked_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test2_masked_16xi32_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_masked_16xi32_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [1:1.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 3, i32 0, i32 7, i32 5, i32 7, i32 4, i32 11, i32 9, i32 11, i32 8, i32 15, i32 13, i32 15, i32 12>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
- ret <16 x i32> %res
-}
-
-define <16 x i32> @test2_masked_z_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %mask) {
-; GENERIC-LABEL: test2_masked_z_16xi32_perm_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_masked_z_16xi32_perm_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 3, i32 0, i32 7, i32 5, i32 7, i32 4, i32 11, i32 9, i32 11, i32 8, i32 15, i32 13, i32 15, i32 12>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
- ret <16 x i32> %res
-}
-define <16 x i32> @test2_masked_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test2_masked_16xi32_perm_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_masked_16xi32_perm_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] sched: [1:1.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 2, i32 0, i32 3, i32 0, i32 6, i32 4, i32 7, i32 4, i32 10, i32 8, i32 11, i32 8, i32 14, i32 12, i32 15, i32 12>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
- ret <16 x i32> %res
-}
-
-define <16 x i32> @test2_masked_z_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %mask) {
-; GENERIC-LABEL: test2_masked_z_16xi32_perm_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_masked_z_16xi32_perm_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 2, i32 0, i32 3, i32 0, i32 6, i32 4, i32 7, i32 4, i32 10, i32 8, i32 11, i32 8, i32 14, i32 12, i32 15, i32 12>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
- ret <16 x i32> %res
-}
-define <16 x i32> @test2_masked_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test2_masked_16xi32_perm_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_masked_16xi32_perm_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] sched: [1:1.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 0, i32 5, i32 7, i32 7, i32 4, i32 9, i32 11, i32 11, i32 8, i32 13, i32 15, i32 15, i32 12>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
- ret <16 x i32> %res
-}
-
-define <16 x i32> @test2_masked_z_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %mask) {
-; GENERIC-LABEL: test2_masked_z_16xi32_perm_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_masked_z_16xi32_perm_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 0, i32 5, i32 7, i32 7, i32 4, i32 9, i32 11, i32 11, i32 8, i32 13, i32 15, i32 15, i32 12>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
- ret <16 x i32> %res
-}
-define <16 x i32> @test2_16xi32_perm_mask3(<16 x i32> %vec) {
-; GENERIC-LABEL: test2_16xi32_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_16xi32_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 2, i32 0, i32 3, i32 7, i32 6, i32 4, i32 7, i32 11, i32 10, i32 8, i32 11, i32 15, i32 14, i32 12, i32 15>
- ret <16 x i32> %res
-}
-define <16 x i32> @test2_masked_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test2_masked_16xi32_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_masked_16xi32_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [1:1.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 2, i32 0, i32 3, i32 7, i32 6, i32 4, i32 7, i32 11, i32 10, i32 8, i32 11, i32 15, i32 14, i32 12, i32 15>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
- ret <16 x i32> %res
-}
-
-define <16 x i32> @test2_masked_z_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %mask) {
-; GENERIC-LABEL: test2_masked_z_16xi32_perm_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_masked_z_16xi32_perm_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 2, i32 0, i32 3, i32 7, i32 6, i32 4, i32 7, i32 11, i32 10, i32 8, i32 11, i32 15, i32 14, i32 12, i32 15>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
- ret <16 x i32> %res
-}
-define <16 x i32> @test2_16xi32_perm_mem_mask0(<16 x i32>* %vp) {
-; GENERIC-LABEL: test2_16xi32_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_16xi32_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vpermilps {{.*#+}} zmm0 = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i32>, <16 x i32>* %vp
- %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 1, i32 3, i32 5, i32 4, i32 5, i32 7, i32 9, i32 8, i32 9, i32 11, i32 13, i32 12, i32 13, i32 15>
- ret <16 x i32> %res
-}
-define <16 x i32> @test2_masked_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_masked_16xi32_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i32>, <16 x i32>* %vp
- %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 1, i32 3, i32 5, i32 4, i32 5, i32 7, i32 9, i32 8, i32 9, i32 11, i32 13, i32 12, i32 13, i32 15>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
- ret <16 x i32> %res
-}
-
-define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %mask) {
-; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_masked_z_16xi32_perm_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i32>, <16 x i32>* %vp
- %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 1, i32 3, i32 5, i32 4, i32 5, i32 7, i32 9, i32 8, i32 9, i32 11, i32 13, i32 12, i32 13, i32 15>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
- ret <16 x i32> %res
-}
-
-define <16 x i32> @test2_masked_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_masked_16xi32_perm_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i32>, <16 x i32>* %vp
- %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 0, i32 2, i32 5, i32 4, i32 4, i32 6, i32 9, i32 8, i32 8, i32 10, i32 13, i32 12, i32 12, i32 14>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
- ret <16 x i32> %res
-}
-
-define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %mask) {
-; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_masked_z_16xi32_perm_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i32>, <16 x i32>* %vp
- %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 0, i32 2, i32 5, i32 4, i32 4, i32 6, i32 9, i32 8, i32 8, i32 10, i32 13, i32 12, i32 12, i32 14>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
- ret <16 x i32> %res
-}
-
-define <16 x i32> @test2_masked_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_masked_16xi32_perm_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i32>, <16 x i32>* %vp
- %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 2, i32 0, i32 1, i32 2, i32 6, i32 4, i32 5, i32 6, i32 10, i32 8, i32 9, i32 10, i32 14, i32 12, i32 13, i32 14>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
- ret <16 x i32> %res
-}
-
-define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %mask) {
-; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_masked_z_16xi32_perm_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i32>, <16 x i32>* %vp
- %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 2, i32 0, i32 1, i32 2, i32 6, i32 4, i32 5, i32 6, i32 10, i32 8, i32 9, i32 10, i32 14, i32 12, i32 13, i32 14>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
- ret <16 x i32> %res
-}
-
-define <16 x i32> @test2_16xi32_perm_mem_mask3(<16 x i32>* %vp) {
-; GENERIC-LABEL: test2_16xi32_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_16xi32_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i32>, <16 x i32>* %vp
- %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 1, i32 1, i32 7, i32 5, i32 5, i32 5, i32 11, i32 9, i32 9, i32 9, i32 15, i32 13, i32 13, i32 13>
- ret <16 x i32> %res
-}
-define <16 x i32> @test2_masked_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test2_masked_16xi32_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_masked_16xi32_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i32>, <16 x i32>* %vp
- %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 1, i32 1, i32 7, i32 5, i32 5, i32 5, i32 11, i32 9, i32 9, i32 9, i32 15, i32 13, i32 13, i32 13>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
- ret <16 x i32> %res
-}
-
-define <16 x i32> @test2_masked_z_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %mask) {
-; GENERIC-LABEL: test2_masked_z_16xi32_perm_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_masked_z_16xi32_perm_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm0, %zmm0, %k1 # sched: [3:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec = load <16 x i32>, <16 x i32>* %vp
- %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 1, i32 1, i32 7, i32 5, i32 5, i32 5, i32 11, i32 9, i32 9, i32 9, i32 15, i32 13, i32 13, i32 13>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
- ret <16 x i32> %res
-}
-
-define <8 x float> @test2_8xfloat_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2) {
-; GENERIC-LABEL: test2_8xfloat_shuff_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_8xfloat_shuff_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
- ret <8 x float> %res
-}
-define <8 x float> @test2_8xfloat_masked_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) {
-; GENERIC-LABEL: test2_8xfloat_masked_shuff_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_8xfloat_masked_shuff_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00]
-; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
- ret <8 x float> %res
-}
-
-define <8 x float> @test2_8xfloat_zero_masked_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test2_8xfloat_zero_masked_shuff_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_8xfloat_zero_masked_shuff_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
- ret <8 x float> %res
-}
-define <8 x float> @test2_8xfloat_masked_shuff_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) {
-; GENERIC-LABEL: test2_8xfloat_masked_shuff_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_8xfloat_masked_shuff_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00]
-; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
- ret <8 x float> %res
-}
-
-define <8 x float> @test2_8xfloat_zero_masked_shuff_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test2_8xfloat_zero_masked_shuff_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_8xfloat_zero_masked_shuff_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
- ret <8 x float> %res
-}
-define <8 x float> @test2_8xfloat_masked_shuff_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) {
-; GENERIC-LABEL: test2_8xfloat_masked_shuff_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_8xfloat_masked_shuff_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [3:1.00]
-; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
- ret <8 x float> %res
-}
-
-define <8 x float> @test2_8xfloat_zero_masked_shuff_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test2_8xfloat_zero_masked_shuff_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_8xfloat_zero_masked_shuff_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
- ret <8 x float> %res
-}
-define <8 x float> @test2_8xfloat_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2) {
-; GENERIC-LABEL: test2_8xfloat_shuff_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_8xfloat_shuff_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
- ret <8 x float> %res
-}
-define <8 x float> @test2_8xfloat_masked_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) {
-; GENERIC-LABEL: test2_8xfloat_masked_shuff_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test2_8xfloat_masked_shuff_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00]
-; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
- ret <8 x float> %res
-}
-
-define <8 x float> @test_8xfloat_zero_masked_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_zero_masked_shuff_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
- ret <8 x float> %res
-}
-define <8 x float> @test_8xfloat_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p) {
-; GENERIC-LABEL: test_8xfloat_shuff_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_shuff_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x float>, <8 x float>* %vec2p
- %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
- ret <8 x float> %res
-}
-define <8 x float> @test_8xfloat_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [8:1.00]
-; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_masked_shuff_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [10:1.00]
-; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x float>, <8 x float>* %vec2p
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
- ret <8 x float> %res
-}
-
-define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x float>, <8 x float>* %vec2p
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
- ret <8 x float> %res
-}
-
-define <8 x float> @test_8xfloat_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [8:1.00]
-; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_masked_shuff_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [10:1.00]
-; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x float>, <8 x float>* %vec2p
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
- ret <8 x float> %res
-}
-
-define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x float>, <8 x float>* %vec2p
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
- ret <8 x float> %res
-}
-
-define <8 x float> @test_8xfloat_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00]
-; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_masked_shuff_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00]
-; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x float>, <8 x float>* %vec2p
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
- ret <8 x float> %res
-}
-
-define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x float>, <8 x float>* %vec2p
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
- ret <8 x float> %res
-}
-
-define <8 x float> @test_8xfloat_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p) {
-; GENERIC-LABEL: test_8xfloat_shuff_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_shuff_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x float>, <8 x float>* %vec2p
- %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
- ret <8 x float> %res
-}
-define <8 x float> @test_8xfloat_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_masked_shuff_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00]
-; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_masked_shuff_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00]
-; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x float>, <8 x float>* %vec2p
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
- ret <8 x float> %res
-}
-
-define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_zero_masked_shuff_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x float>, <8 x float>* %vec2p
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
- ret <8 x float> %res
-}
-
-define <16 x float> @test_16xfloat_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_shuff_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],zmm1[2,3,6,7] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_shuff_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],zmm1[2,3,6,7] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31>
- ret <16 x float> %res
-}
-define <16 x float> @test_16xfloat_masked_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_masked_shuff_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_masked_shuff_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] sched: [3:1.00]
-; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
- ret <16 x float> %res
-}
-
-define <16 x float> @test_16xfloat_zero_masked_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_zero_masked_shuff_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
- ret <16 x float> %res
-}
-define <16 x float> @test_16xfloat_masked_shuff_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_masked_shuff_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_masked_shuff_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] sched: [3:1.00]
-; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 28, i32 29, i32 30, i32 31>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
- ret <16 x float> %res
-}
-
-define <16 x float> @test_16xfloat_zero_masked_shuff_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_zero_masked_shuff_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 28, i32 29, i32 30, i32 31>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
- ret <16 x float> %res
-}
-define <16 x float> @test_16xfloat_masked_shuff_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_masked_shuff_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_masked_shuff_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] sched: [3:1.00]
-; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
- ret <16 x float> %res
-}
-
-define <16 x float> @test_16xfloat_zero_masked_shuff_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_zero_masked_shuff_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
- ret <16 x float> %res
-}
-define <16 x float> @test_16xfloat_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2) {
-; GENERIC-LABEL: test_16xfloat_shuff_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,6,7],zmm1[0,1,4,5] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_shuff_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,6,7],zmm1[0,1,4,5] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27>
- ret <16 x float> %res
-}
-define <16 x float> @test_16xfloat_masked_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_masked_shuff_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_masked_shuff_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] sched: [3:1.00]
-; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
- ret <16 x float> %res
-}
-
-define <16 x float> @test_16xfloat_zero_masked_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_zero_masked_shuff_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
- ret <16 x float> %res
-}
-define <16 x float> @test_16xfloat_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) {
-; GENERIC-LABEL: test_16xfloat_shuff_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,4,5],mem[4,5,2,3] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_shuff_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,4,5],mem[4,5,2,3] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x float>, <16 x float>* %vec2p
- %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23>
- ret <16 x float> %res
-}
-define <16 x float> @test_16xfloat_masked_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [8:1.00]
-; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_masked_shuff_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [10:1.00]
-; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x float>, <16 x float>* %vec2p
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
- ret <16 x float> %res
-}
-
-define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x float>, <16 x float>* %vec2p
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
- ret <16 x float> %res
-}
-
-define <16 x float> @test_16xfloat_masked_shuff_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [8:1.00]
-; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_masked_shuff_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [10:1.00]
-; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x float>, <16 x float>* %vec2p
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
- ret <16 x float> %res
-}
-
-define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x float>, <16 x float>* %vec2p
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
- ret <16 x float> %res
-}
-
-define <16 x float> @test_16xfloat_masked_shuff_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [8:1.00]
-; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_masked_shuff_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [10:1.00]
-; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x float>, <16 x float>* %vec2p
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 24, i32 25, i32 26, i32 27, i32 24, i32 25, i32 26, i32 27>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
- ret <16 x float> %res
-}
-
-define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x float>, <16 x float>* %vec2p
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 24, i32 25, i32 26, i32 27, i32 24, i32 25, i32 26, i32 27>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
- ret <16 x float> %res
-}
-
-define <16 x float> @test_16xfloat_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) {
-; GENERIC-LABEL: test_16xfloat_shuff_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,6,7] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_shuff_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,6,7] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x float>, <16 x float>* %vec2p
- %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31>
- ret <16 x float> %res
-}
-define <16 x float> @test_16xfloat_masked_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_masked_shuff_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [8:1.00]
-; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_masked_shuff_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [10:1.00]
-; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x float>, <16 x float>* %vec2p
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
- ret <16 x float> %res
-}
-
-define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_zero_masked_shuff_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x float>, <16 x float>* %vec2p
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
- ret <16 x float> %res
-}
-
-define <4 x double> @test_4xdouble_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2) {
-; GENERIC-LABEL: test_4xdouble_shuff_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_shuff_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
- ret <4 x double> %res
-}
-define <4 x double> @test_4xdouble_masked_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_masked_shuff_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_masked_shuff_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
-; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
- ret <4 x double> %res
-}
-
-define <4 x double> @test_4xdouble_zero_masked_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_zero_masked_shuff_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
- ret <4 x double> %res
-}
-define <4 x double> @test_4xdouble_masked_shuff_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_masked_shuff_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_masked_shuff_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
-; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
- ret <4 x double> %res
-}
-
-define <4 x double> @test_4xdouble_zero_masked_shuff_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_zero_masked_shuff_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
- ret <4 x double> %res
-}
-define <4 x double> @test_4xdouble_masked_shuff_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_masked_shuff_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_masked_shuff_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [3:1.00]
-; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
- ret <4 x double> %res
-}
-
-define <4 x double> @test_4xdouble_zero_masked_shuff_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_zero_masked_shuff_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
- ret <4 x double> %res
-}
-define <4 x double> @test_4xdouble_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2) {
-; GENERIC-LABEL: test_4xdouble_shuff_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_shuff_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
- ret <4 x double> %res
-}
-define <4 x double> @test_4xdouble_masked_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_masked_shuff_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_masked_shuff_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [3:1.00]
-; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
- ret <4 x double> %res
-}
-
-define <4 x double> @test_4xdouble_zero_masked_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_zero_masked_shuff_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
- ret <4 x double> %res
-}
-define <4 x double> @test_4xdouble_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p) {
-; GENERIC-LABEL: test_4xdouble_shuff_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_shuff_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x double>, <4 x double>* %vec2p
- %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
- ret <4 x double> %res
-}
-define <4 x double> @test_4xdouble_masked_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [8:1.00]
-; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_masked_shuff_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [10:1.00]
-; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x double>, <4 x double>* %vec2p
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
- ret <4 x double> %res
-}
-
-define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x double>, <4 x double>* %vec2p
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
- ret <4 x double> %res
-}
-
-define <4 x double> @test_4xdouble_masked_shuff_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [8:1.00]
-; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_masked_shuff_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [10:1.00]
-; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x double>, <4 x double>* %vec2p
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
- ret <4 x double> %res
-}
-
-define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x double>, <4 x double>* %vec2p
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
- ret <4 x double> %res
-}
-
-define <4 x double> @test_4xdouble_masked_shuff_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [8:1.00]
-; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_masked_shuff_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [10:1.00]
-; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x double>, <4 x double>* %vec2p
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
- ret <4 x double> %res
-}
-
-define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x double>, <4 x double>* %vec2p
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
- ret <4 x double> %res
-}
-
-define <4 x double> @test_4xdouble_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p) {
-; GENERIC-LABEL: test_4xdouble_shuff_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_shuff_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x double>, <4 x double>* %vec2p
- %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
- ret <4 x double> %res
-}
-define <4 x double> @test_4xdouble_masked_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_masked_shuff_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [8:1.00]
-; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_masked_shuff_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [10:1.00]
-; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x double>, <4 x double>* %vec2p
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
- ret <4 x double> %res
-}
-
-define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_zero_masked_shuff_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x double>, <4 x double>* %vec2p
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
- ret <4 x double> %res
-}
-
-define <8 x double> @test_8xdouble_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2) {
-; GENERIC-LABEL: test_8xdouble_shuff_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_shuff_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 2, i32 3, i32 14, i32 15, i32 8, i32 9>
- ret <8 x double> %res
-}
-define <8 x double> @test_8xdouble_masked_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_masked_shuff_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_masked_shuff_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [3:1.00]
-; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 2, i32 3, i32 14, i32 15, i32 8, i32 9>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
- ret <8 x double> %res
-}
-
-define <8 x double> @test_8xdouble_zero_masked_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_zero_masked_shuff_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 2, i32 3, i32 14, i32 15, i32 8, i32 9>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-define <8 x double> @test_8xdouble_masked_shuff_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_masked_shuff_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,4,5] sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_masked_shuff_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,4,5] sched: [3:1.00]
-; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
- ret <8 x double> %res
-}
-
-define <8 x double> @test_8xdouble_zero_masked_shuff_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,4,5] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_zero_masked_shuff_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,4,5] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-define <8 x double> @test_8xdouble_masked_shuff_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_masked_shuff_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[4,5,0,1] sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_masked_shuff_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[4,5,0,1] sched: [3:1.00]
-; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 12, i32 13, i32 8, i32 9>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
- ret <8 x double> %res
-}
-
-define <8 x double> @test_8xdouble_zero_masked_shuff_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[4,5,0,1] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_zero_masked_shuff_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[4,5,0,1] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 12, i32 13, i32 8, i32 9>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-define <8 x double> @test_8xdouble_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2) {
-; GENERIC-LABEL: test_8xdouble_shuff_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_shuff_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 4, i32 5, i32 4, i32 5, i32 12, i32 13, i32 10, i32 11>
- ret <8 x double> %res
-}
-define <8 x double> @test_8xdouble_masked_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_masked_shuff_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_masked_shuff_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [3:1.00]
-; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 4, i32 5, i32 4, i32 5, i32 12, i32 13, i32 10, i32 11>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
- ret <8 x double> %res
-}
-
-define <8 x double> @test_8xdouble_zero_masked_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_zero_masked_shuff_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 4, i32 5, i32 4, i32 5, i32 12, i32 13, i32 10, i32 11>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-define <8 x double> @test_8xdouble_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) {
-; GENERIC-LABEL: test_8xdouble_shuff_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],mem[0,1,0,1] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_shuff_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],mem[0,1,0,1] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x double>, <8 x double>* %vec2p
- %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 8, i32 9, i32 8, i32 9>
- ret <8 x double> %res
-}
-define <8 x double> @test_8xdouble_masked_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [8:1.00]
-; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_masked_shuff_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [10:1.00]
-; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x double>, <8 x double>* %vec2p
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 8, i32 9, i32 8, i32 9>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
- ret <8 x double> %res
-}
-
-define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x double>, <8 x double>* %vec2p
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 8, i32 9, i32 8, i32 9>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-
-define <8 x double> @test_8xdouble_masked_shuff_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [8:1.00]
-; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_masked_shuff_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [10:1.00]
-; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x double>, <8 x double>* %vec2p
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
- ret <8 x double> %res
-}
-
-define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x double>, <8 x double>* %vec2p
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-
-define <8 x double> @test_8xdouble_masked_shuff_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [8:1.00]
-; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_masked_shuff_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [10:1.00]
-; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x double>, <8 x double>* %vec2p
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 12, i32 13>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
- ret <8 x double> %res
-}
-
-define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x double>, <8 x double>* %vec2p
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 12, i32 13>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-
-define <8 x double> @test_8xdouble_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) {
-; GENERIC-LABEL: test_8xdouble_shuff_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[4,5,0,1] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_shuff_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[4,5,0,1] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x double>, <8 x double>* %vec2p
- %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 12, i32 13, i32 8, i32 9>
- ret <8 x double> %res
-}
-define <8 x double> @test_8xdouble_masked_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_masked_shuff_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [8:1.00]
-; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_masked_shuff_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [10:1.00]
-; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x double>, <8 x double>* %vec2p
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 12, i32 13, i32 8, i32 9>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
- ret <8 x double> %res
-}
-
-define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_zero_masked_shuff_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x double>, <8 x double>* %vec2p
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 12, i32 13, i32 8, i32 9>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-
-define <8 x i32> @test_8xi32_shuff_mask0(<8 x i32> %vec1, <8 x i32> %vec2) {
-; GENERIC-LABEL: test_8xi32_shuff_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi32_shuff_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
- ret <8 x i32> %res
-}
-define <8 x i32> @test_8xi32_masked_shuff_mask0(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %vec3, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xi32_masked_shuff_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi32_masked_shuff_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [3:1.00]
-; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3
- ret <8 x i32> %res
-}
-
-define <8 x i32> @test_8xi32_zero_masked_shuff_mask0(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi32_zero_masked_shuff_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
- ret <8 x i32> %res
-}
-define <8 x i32> @test_8xi32_masked_shuff_mask1(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %vec3, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xi32_masked_shuff_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi32_masked_shuff_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00]
-; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3
- ret <8 x i32> %res
-}
-
-define <8 x i32> @test_8xi32_zero_masked_shuff_mask1(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi32_zero_masked_shuff_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
- ret <8 x i32> %res
-}
-define <8 x i32> @test_8xi32_masked_shuff_mask2(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %vec3, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xi32_masked_shuff_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi32_masked_shuff_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [3:1.00]
-; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3
- ret <8 x i32> %res
-}
-
-define <8 x i32> @test_8xi32_zero_masked_shuff_mask2(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi32_zero_masked_shuff_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
- ret <8 x i32> %res
-}
-define <8 x i32> @test_8xi32_shuff_mask3(<8 x i32> %vec1, <8 x i32> %vec2) {
-; GENERIC-LABEL: test_8xi32_shuff_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi32_shuff_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
- ret <8 x i32> %res
-}
-define <8 x i32> @test_8xi32_masked_shuff_mask3(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %vec3, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xi32_masked_shuff_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi32_masked_shuff_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00]
-; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3
- ret <8 x i32> %res
-}
-
-define <8 x i32> @test_8xi32_zero_masked_shuff_mask3(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi32_zero_masked_shuff_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
- ret <8 x i32> %res
-}
-define <8 x i32> @test_8xi32_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* %vec2p) {
-; GENERIC-LABEL: test_8xi32_shuff_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi32_shuff_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x i32>, <8 x i32>* %vec2p
- %res = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
- ret <8 x i32> %res
-}
-define <8 x i32> @test_8xi32_masked_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [8:1.00]
-; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi32_masked_shuff_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [10:1.00]
-; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x i32>, <8 x i32>* %vec2p
- %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3
- ret <8 x i32> %res
-}
-
-define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x i32>, <8 x i32>* %vec2p
- %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
- ret <8 x i32> %res
-}
-
-define <8 x i32> @test_8xi32_masked_shuff_mem_mask1(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00]
-; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi32_masked_shuff_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00]
-; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x i32>, <8 x i32>* %vec2p
- %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3
- ret <8 x i32> %res
-}
-
-define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask1(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x i32>, <8 x i32>* %vec2p
- %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
- ret <8 x i32> %res
-}
-
-define <8 x i32> @test_8xi32_masked_shuff_mem_mask2(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00]
-; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi32_masked_shuff_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00]
-; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x i32>, <8 x i32>* %vec2p
- %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3
- ret <8 x i32> %res
-}
-
-define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask2(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x i32>, <8 x i32>* %vec2p
- %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
- ret <8 x i32> %res
-}
-
-define <8 x i32> @test_8xi32_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* %vec2p) {
-; GENERIC-LABEL: test_8xi32_shuff_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi32_shuff_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x i32>, <8 x i32>* %vec2p
- %res = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
- ret <8 x i32> %res
-}
-define <8 x i32> @test_8xi32_masked_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xi32_masked_shuff_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00]
-; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi32_masked_shuff_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00]
-; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x i32>, <8 x i32>* %vec2p
- %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3
- ret <8 x i32> %res
-}
-
-define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xi32_zero_masked_shuff_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x i32>, <8 x i32>* %vec2p
- %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
- ret <8 x i32> %res
-}
-
-define <16 x i32> @test_16xi32_shuff_mask0(<16 x i32> %vec1, <16 x i32> %vec2) {
-; GENERIC-LABEL: test_16xi32_shuff_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],zmm1[2,3,6,7] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xi32_shuff_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],zmm1[2,3,6,7] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31>
- ret <16 x i32> %res
-}
-define <16 x i32> @test_16xi32_masked_shuff_mask0(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xi32_masked_shuff_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xi32_masked_shuff_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] sched: [3:1.00]
-; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3
- ret <16 x i32> %res
-}
-
-define <16 x i32> @test_16xi32_zero_masked_shuff_mask0(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xi32_zero_masked_shuff_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
- ret <16 x i32> %res
-}
-define <16 x i32> @test_16xi32_masked_shuff_mask1(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xi32_masked_shuff_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xi32_masked_shuff_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] sched: [3:1.00]
-; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3
- ret <16 x i32> %res
-}
-
-define <16 x i32> @test_16xi32_zero_masked_shuff_mask1(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xi32_zero_masked_shuff_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
- ret <16 x i32> %res
-}
-define <16 x i32> @test_16xi32_masked_shuff_mask2(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xi32_masked_shuff_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xi32_masked_shuff_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] sched: [3:1.00]
-; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 16, i32 17, i32 18, i32 19>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3
- ret <16 x i32> %res
-}
-
-define <16 x i32> @test_16xi32_zero_masked_shuff_mask2(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xi32_zero_masked_shuff_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 16, i32 17, i32 18, i32 19>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
- ret <16 x i32> %res
-}
-define <16 x i32> @test_16xi32_shuff_mask3(<16 x i32> %vec1, <16 x i32> %vec2) {
-; GENERIC-LABEL: test_16xi32_shuff_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],zmm1[4,5,2,3] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xi32_shuff_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],zmm1[4,5,2,3] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23>
- ret <16 x i32> %res
-}
-define <16 x i32> @test_16xi32_masked_shuff_mask3(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xi32_masked_shuff_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xi32_masked_shuff_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] sched: [3:1.00]
-; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3
- ret <16 x i32> %res
-}
-
-define <16 x i32> @test_16xi32_zero_masked_shuff_mask3(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xi32_zero_masked_shuff_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
- ret <16 x i32> %res
-}
-define <16 x i32> @test_16xi32_shuff_mem_mask0(<16 x i32> %vec1, <16 x i32>* %vec2p) {
-; GENERIC-LABEL: test_16xi32_shuff_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],mem[4,5,0,1] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xi32_shuff_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],mem[4,5,0,1] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x i32>, <16 x i32>* %vec2p
- %res = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 16, i32 17, i32 18, i32 19>
- ret <16 x i32> %res
-}
-define <16 x i32> @test_16xi32_masked_shuff_mem_mask0(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [8:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xi32_masked_shuff_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [10:1.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x i32>, <16 x i32>* %vec2p
- %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 16, i32 17, i32 18, i32 19>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3
- ret <16 x i32> %res
-}
-
-define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask0(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x i32>, <16 x i32>* %vec2p
- %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 16, i32 17, i32 18, i32 19>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
- ret <16 x i32> %res
-}
-
-define <16 x i32> @test_16xi32_masked_shuff_mem_mask1(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [8:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xi32_masked_shuff_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [10:1.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x i32>, <16 x i32>* %vec2p
- %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3
- ret <16 x i32> %res
-}
-
-define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask1(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x i32>, <16 x i32>* %vec2p
- %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
- ret <16 x i32> %res
-}
-
-define <16 x i32> @test_16xi32_masked_shuff_mem_mask2(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [8:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xi32_masked_shuff_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [10:1.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x i32>, <16 x i32>* %vec2p
- %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3
- ret <16 x i32> %res
-}
-
-define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask2(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x i32>, <16 x i32>* %vec2p
- %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
- ret <16 x i32> %res
-}
-
-define <16 x i32> @test_16xi32_shuff_mem_mask3(<16 x i32> %vec1, <16 x i32>* %vec2p) {
-; GENERIC-LABEL: test_16xi32_shuff_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[2,3,6,7] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xi32_shuff_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[2,3,6,7] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x i32>, <16 x i32>* %vec2p
- %res = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31>
- ret <16 x i32> %res
-}
-define <16 x i32> @test_16xi32_masked_shuff_mem_mask3(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xi32_masked_shuff_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [8:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xi32_masked_shuff_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [10:1.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x i32>, <16 x i32>* %vec2p
- %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3
- ret <16 x i32> %res
-}
-
-define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask3(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xi32_zero_masked_shuff_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x i32>, <16 x i32>* %vec2p
- %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
- ret <16 x i32> %res
-}
-
-define <4 x i64> @test_4xi64_shuff_mask0(<4 x i64> %vec1, <4 x i64> %vec2) {
-; GENERIC-LABEL: test_4xi64_shuff_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xi64_shuff_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
- ret <4 x i64> %res
-}
-define <4 x i64> @test_4xi64_masked_shuff_mask0(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %vec3, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xi64_masked_shuff_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xi64_masked_shuff_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
-; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3
- ret <4 x i64> %res
-}
-
-define <4 x i64> @test_4xi64_zero_masked_shuff_mask0(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xi64_zero_masked_shuff_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
- ret <4 x i64> %res
-}
-define <4 x i64> @test_4xi64_masked_shuff_mask1(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %vec3, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xi64_masked_shuff_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xi64_masked_shuff_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [3:1.00]
-; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3
- ret <4 x i64> %res
-}
-
-define <4 x i64> @test_4xi64_zero_masked_shuff_mask1(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xi64_zero_masked_shuff_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
- ret <4 x i64> %res
-}
-define <4 x i64> @test_4xi64_masked_shuff_mask2(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %vec3, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xi64_masked_shuff_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xi64_masked_shuff_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
-; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3
- ret <4 x i64> %res
-}
-
-define <4 x i64> @test_4xi64_zero_masked_shuff_mask2(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xi64_zero_masked_shuff_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
- ret <4 x i64> %res
-}
-define <4 x i64> @test_4xi64_shuff_mask3(<4 x i64> %vec1, <4 x i64> %vec2) {
-; GENERIC-LABEL: test_4xi64_shuff_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xi64_shuff_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
- ret <4 x i64> %res
-}
-define <4 x i64> @test_4xi64_masked_shuff_mask3(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %vec3, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xi64_masked_shuff_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xi64_masked_shuff_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [3:1.00]
-; SKX-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3
- ret <4 x i64> %res
-}
-
-define <4 x i64> @test_4xi64_zero_masked_shuff_mask3(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xi64_zero_masked_shuff_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
- ret <4 x i64> %res
-}
-define <4 x i64> @test_4xi64_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* %vec2p) {
-; GENERIC-LABEL: test_4xi64_shuff_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xi64_shuff_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x i64>, <4 x i64>* %vec2p
- %res = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
- ret <4 x i64> %res
-}
-define <4 x i64> @test_4xi64_masked_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [8:1.00]
-; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xi64_masked_shuff_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [10:1.00]
-; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x i64>, <4 x i64>* %vec2p
- %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3
- ret <4 x i64> %res
-}
-
-define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x i64>, <4 x i64>* %vec2p
- %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
- ret <4 x i64> %res
-}
-
-define <4 x i64> @test_4xi64_masked_shuff_mem_mask1(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [8:1.00]
-; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xi64_masked_shuff_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [10:1.00]
-; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x i64>, <4 x i64>* %vec2p
- %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3
- ret <4 x i64> %res
-}
-
-define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask1(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x i64>, <4 x i64>* %vec2p
- %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
- ret <4 x i64> %res
-}
-
-define <4 x i64> @test_4xi64_masked_shuff_mem_mask2(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [8:1.00]
-; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xi64_masked_shuff_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [10:1.00]
-; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x i64>, <4 x i64>* %vec2p
- %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3
- ret <4 x i64> %res
-}
-
-define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask2(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x i64>, <4 x i64>* %vec2p
- %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
- ret <4 x i64> %res
-}
-
-define <4 x i64> @test_4xi64_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* %vec2p) {
-; GENERIC-LABEL: test_4xi64_shuff_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xi64_shuff_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x i64>, <4 x i64>* %vec2p
- %res = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
- ret <4 x i64> %res
-}
-define <4 x i64> @test_4xi64_masked_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xi64_masked_shuff_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [8:1.00]
-; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xi64_masked_shuff_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [10:1.00]
-; SKX-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x i64>, <4 x i64>* %vec2p
- %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3
- ret <4 x i64> %res
-}
-
-define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xi64_zero_masked_shuff_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x i64>, <4 x i64>* %vec2p
- %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
- ret <4 x i64> %res
-}
-
-define <8 x i64> @test_8xi64_shuff_mask0(<8 x i64> %vec1, <8 x i64> %vec2) {
-; GENERIC-LABEL: test_8xi64_shuff_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi64_shuff_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 4, i32 5, i32 4, i32 5, i32 12, i32 13, i32 12, i32 13>
- ret <8 x i64> %res
-}
-define <8 x i64> @test_8xi64_masked_shuff_mask0(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xi64_masked_shuff_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi64_masked_shuff_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [3:1.00]
-; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 4, i32 5, i32 4, i32 5, i32 12, i32 13, i32 12, i32 13>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3
- ret <8 x i64> %res
-}
-
-define <8 x i64> @test_8xi64_zero_masked_shuff_mask0(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi64_zero_masked_shuff_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 4, i32 5, i32 4, i32 5, i32 12, i32 13, i32 12, i32 13>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
- ret <8 x i64> %res
-}
-define <8 x i64> @test_8xi64_masked_shuff_mask1(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xi64_masked_shuff_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[2,3,4,5] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi64_masked_shuff_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[2,3,4,5] sched: [3:1.00]
-; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 12, i32 13>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3
- ret <8 x i64> %res
-}
-
-define <8 x i64> @test_8xi64_zero_masked_shuff_mask1(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[2,3,4,5] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi64_zero_masked_shuff_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[2,3,4,5] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 12, i32 13>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
- ret <8 x i64> %res
-}
-define <8 x i64> @test_8xi64_masked_shuff_mask2(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xi64_masked_shuff_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,0,1] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi64_masked_shuff_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,0,1] sched: [3:1.00]
-; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 8, i32 9>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3
- ret <8 x i64> %res
-}
-
-define <8 x i64> @test_8xi64_zero_masked_shuff_mask2(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,0,1] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi64_zero_masked_shuff_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,0,1] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 8, i32 9>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
- ret <8 x i64> %res
-}
-define <8 x i64> @test_8xi64_shuff_mask3(<8 x i64> %vec1, <8 x i64> %vec2) {
-; GENERIC-LABEL: test_8xi64_shuff_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi64_shuff_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 6, i32 7, i32 12, i32 13, i32 10, i32 11>
- ret <8 x i64> %res
-}
-define <8 x i64> @test_8xi64_masked_shuff_mask3(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xi64_masked_shuff_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [1:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi64_masked_shuff_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [3:1.00]
-; SKX-NEXT: vmovdqa64 %zmm2, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 6, i32 7, i32 12, i32 13, i32 10, i32 11>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3
- ret <8 x i64> %res
-}
-
-define <8 x i64> @test_8xi64_zero_masked_shuff_mask3(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi64_zero_masked_shuff_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 6, i32 7, i32 12, i32 13, i32 10, i32 11>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
- ret <8 x i64> %res
-}
-define <8 x i64> @test_8xi64_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* %vec2p) {
-; GENERIC-LABEL: test_8xi64_shuff_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[4,5,2,3] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi64_shuff_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[4,5,2,3] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x i64>, <8 x i64>* %vec2p
- %res = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 12, i32 13, i32 10, i32 11>
- ret <8 x i64> %res
-}
-define <8 x i64> @test_8xi64_masked_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [8:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi64_masked_shuff_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [10:1.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x i64>, <8 x i64>* %vec2p
- %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 12, i32 13, i32 10, i32 11>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3
- ret <8 x i64> %res
-}
-
-define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x i64>, <8 x i64>* %vec2p
- %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 12, i32 13, i32 10, i32 11>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
- ret <8 x i64> %res
-}
-
-define <8 x i64> @test_8xi64_masked_shuff_mem_mask1(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [8:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi64_masked_shuff_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [10:1.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x i64>, <8 x i64>* %vec2p
- %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 8, i32 9, i32 8, i32 9>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3
- ret <8 x i64> %res
-}
-
-define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask1(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x i64>, <8 x i64>* %vec2p
- %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 8, i32 9, i32 8, i32 9>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
- ret <8 x i64> %res
-}
-
-define <8 x i64> @test_8xi64_masked_shuff_mem_mask2(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [8:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi64_masked_shuff_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [10:1.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x i64>, <8 x i64>* %vec2p
- %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 4, i32 5, i32 0, i32 1, i32 10, i32 11, i32 10, i32 11>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3
- ret <8 x i64> %res
-}
-
-define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask2(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x i64>, <8 x i64>* %vec2p
- %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 4, i32 5, i32 0, i32 1, i32 10, i32 11, i32 10, i32 11>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
- ret <8 x i64> %res
-}
-
-define <8 x i64> @test_8xi64_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* %vec2p) {
-; GENERIC-LABEL: test_8xi64_shuff_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,2,3] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi64_shuff_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,2,3] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x i64>, <8 x i64>* %vec2p
- %res = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 14, i32 15, i32 10, i32 11>
- ret <8 x i64> %res
-}
-define <8 x i64> @test_8xi64_masked_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xi64_masked_shuff_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [8:1.00]
-; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi64_masked_shuff_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [10:1.00]
-; SKX-NEXT: vmovdqa64 %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x i64>, <8 x i64>* %vec2p
- %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 14, i32 15, i32 10, i32 11>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3
- ret <8 x i64> %res
-}
-
-define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xi64_zero_masked_shuff_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [10:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x i64>, <8 x i64>* %vec2p
- %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 14, i32 15, i32 10, i32 11>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
- ret <8 x i64> %res
-}
-
-define <4 x float> @test_4xfloat_unpack_low_mask0(<4 x float> %vec1, <4 x float> %vec2) {
-; GENERIC-LABEL: test_4xfloat_unpack_low_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_unpack_low_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
- ret <4 x float> %res
-}
-define <4 x float> @test_4xfloat_masked_unpack_low_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) {
-; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_masked_unpack_low_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
- ret <4 x float> %res
-}
-
-define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) {
-; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
- ret <4 x float> %res
-}
-define <4 x float> @test_4xfloat_masked_unpack_low_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) {
-; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_masked_unpack_low_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
- ret <4 x float> %res
-}
-
-define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) {
-; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
- ret <4 x float> %res
-}
-define <4 x float> @test_4xfloat_masked_unpack_low_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) {
-; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_masked_unpack_low_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
- ret <4 x float> %res
-}
-
-define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) {
-; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
- ret <4 x float> %res
-}
-define <4 x float> @test_4xfloat_unpack_low_mask3(<4 x float> %vec1, <4 x float> %vec2) {
-; GENERIC-LABEL: test_4xfloat_unpack_low_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_unpack_low_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
- ret <4 x float> %res
-}
-define <4 x float> @test_4xfloat_masked_unpack_low_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) {
-; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_masked_unpack_low_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
- ret <4 x float> %res
-}
-
-define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) {
-; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
- ret <4 x float> %res
-}
-define <4 x float> @test_4xfloat_unpack_low_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p) {
-; GENERIC-LABEL: test_4xfloat_unpack_low_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_unpack_low_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x float>, <4 x float>* %vec2p
- %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
- ret <4 x float> %res
-}
-define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) {
-; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
-; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_masked_unpack_low_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
-; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x float>, <4 x float>* %vec2p
- %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
- ret <4 x float> %res
-}
-
-define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) {
-; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x float>, <4 x float>* %vec2p
- %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
- ret <4 x float> %res
-}
-
-define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) {
-; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
-; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_masked_unpack_low_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
-; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x float>, <4 x float>* %vec2p
- %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
- ret <4 x float> %res
-}
-
-define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) {
-; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x float>, <4 x float>* %vec2p
- %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
- ret <4 x float> %res
-}
-
-define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) {
-; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
-; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_masked_unpack_low_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
-; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x float>, <4 x float>* %vec2p
- %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
- ret <4 x float> %res
-}
-
-define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) {
-; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x float>, <4 x float>* %vec2p
- %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
- ret <4 x float> %res
-}
-
-define <4 x float> @test_4xfloat_unpack_low_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p) {
-; GENERIC-LABEL: test_4xfloat_unpack_low_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_unpack_low_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x float>, <4 x float>* %vec2p
- %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
- ret <4 x float> %res
-}
-define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) {
-; GENERIC-LABEL: test_4xfloat_masked_unpack_low_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
-; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_masked_unpack_low_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
-; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x float>, <4 x float>* %vec2p
- %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
- ret <4 x float> %res
-}
-
-define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) {
-; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x float>, <4 x float>* %vec2p
- %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
- ret <4 x float> %res
-}
-
-define <8 x float> @test_8xfloat_unpack_low_mask0(<8 x float> %vec1, <8 x float> %vec2) {
-; GENERIC-LABEL: test_8xfloat_unpack_low_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_unpack_low_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
- ret <8 x float> %res
-}
-define <8 x float> @test_8xfloat_masked_unpack_low_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_masked_unpack_low_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
-; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
- ret <8 x float> %res
-}
-
-define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
- ret <8 x float> %res
-}
-define <8 x float> @test_8xfloat_masked_unpack_low_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_masked_unpack_low_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
-; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
- ret <8 x float> %res
-}
-
-define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
- ret <8 x float> %res
-}
-define <8 x float> @test_8xfloat_masked_unpack_low_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_masked_unpack_low_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
-; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
- ret <8 x float> %res
-}
-
-define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
- ret <8 x float> %res
-}
-define <8 x float> @test_8xfloat_unpack_low_mask3(<8 x float> %vec1, <8 x float> %vec2) {
-; GENERIC-LABEL: test_8xfloat_unpack_low_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_unpack_low_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
- ret <8 x float> %res
-}
-define <8 x float> @test_8xfloat_masked_unpack_low_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_masked_unpack_low_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
-; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
- ret <8 x float> %res
-}
-
-define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
- ret <8 x float> %res
-}
-define <8 x float> @test_8xfloat_unpack_low_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p) {
-; GENERIC-LABEL: test_8xfloat_unpack_low_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_unpack_low_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x float>, <8 x float>* %vec2p
- %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
- ret <8 x float> %res
-}
-define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
-; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_masked_unpack_low_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
-; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x float>, <8 x float>* %vec2p
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
- ret <8 x float> %res
-}
-
-define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x float>, <8 x float>* %vec2p
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
- ret <8 x float> %res
-}
-
-define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
-; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_masked_unpack_low_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
-; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x float>, <8 x float>* %vec2p
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
- ret <8 x float> %res
-}
-
-define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x float>, <8 x float>* %vec2p
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
- ret <8 x float> %res
-}
-
-define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
-; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_masked_unpack_low_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
-; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x float>, <8 x float>* %vec2p
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
- ret <8 x float> %res
-}
-
-define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x float>, <8 x float>* %vec2p
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
- ret <8 x float> %res
-}
-
-define <8 x float> @test_8xfloat_unpack_low_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p) {
-; GENERIC-LABEL: test_8xfloat_unpack_low_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_unpack_low_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x float>, <8 x float>* %vec2p
- %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
- ret <8 x float> %res
-}
-define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_masked_unpack_low_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
-; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_masked_unpack_low_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
-; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x float>, <8 x float>* %vec2p
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
- ret <8 x float> %res
-}
-
-define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x float>, <8 x float>* %vec2p
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
- ret <8 x float> %res
-}
-
-define <16 x float> @test_16xfloat_unpack_low_mask0(<16 x float> %vec1, <16 x float> %vec2) {
-; GENERIC-LABEL: test_16xfloat_unpack_low_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_unpack_low_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
- ret <16 x float> %res
-}
-define <16 x float> @test_16xfloat_masked_unpack_low_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_masked_unpack_low_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00]
-; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
- ret <16 x float> %res
-}
-
-define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
- ret <16 x float> %res
-}
-define <16 x float> @test_16xfloat_masked_unpack_low_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_masked_unpack_low_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00]
-; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
- ret <16 x float> %res
-}
-
-define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
- ret <16 x float> %res
-}
-define <16 x float> @test_16xfloat_masked_unpack_low_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_masked_unpack_low_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00]
-; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
- ret <16 x float> %res
-}
-
-define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
- ret <16 x float> %res
-}
-define <16 x float> @test_16xfloat_unpack_low_mask3(<16 x float> %vec1, <16 x float> %vec2) {
-; GENERIC-LABEL: test_16xfloat_unpack_low_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_unpack_low_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
- ret <16 x float> %res
-}
-define <16 x float> @test_16xfloat_masked_unpack_low_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_masked_unpack_low_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00]
-; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
- ret <16 x float> %res
-}
-
-define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
- ret <16 x float> %res
-}
-define <16 x float> @test_16xfloat_unpack_low_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) {
-; GENERIC-LABEL: test_16xfloat_unpack_low_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_unpack_low_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x float>, <16 x float>* %vec2p
- %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
- ret <16 x float> %res
-}
-define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
-; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_masked_unpack_low_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
-; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x float>, <16 x float>* %vec2p
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
- ret <16 x float> %res
-}
-
-define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x float>, <16 x float>* %vec2p
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
- ret <16 x float> %res
-}
-
-define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
-; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_masked_unpack_low_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
-; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x float>, <16 x float>* %vec2p
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
- ret <16 x float> %res
-}
-
-define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x float>, <16 x float>* %vec2p
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
- ret <16 x float> %res
-}
-
-define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
-; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_masked_unpack_low_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
-; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x float>, <16 x float>* %vec2p
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
- ret <16 x float> %res
-}
-
-define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x float>, <16 x float>* %vec2p
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
- ret <16 x float> %res
-}
-
-define <16 x float> @test_16xfloat_unpack_low_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) {
-; GENERIC-LABEL: test_16xfloat_unpack_low_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_unpack_low_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x float>, <16 x float>* %vec2p
- %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
- ret <16 x float> %res
-}
-define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_masked_unpack_low_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
-; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_masked_unpack_low_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
-; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x float>, <16 x float>* %vec2p
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
- ret <16 x float> %res
-}
-
-define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x float>, <16 x float>* %vec2p
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
- ret <16 x float> %res
-}
-
-define <2 x double> @test_2xdouble_unpack_low_mask0(<2 x double> %vec1, <2 x double> %vec2) {
-; GENERIC-LABEL: test_2xdouble_unpack_low_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_2xdouble_unpack_low_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2>
- ret <2 x double> %res
-}
-define <2 x double> @test_2xdouble_masked_unpack_low_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x i64> %mask) {
-; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_2xdouble_masked_unpack_low_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] sched: [1:1.00]
-; SKX-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2>
- %cmp = icmp eq <2 x i64> %mask, zeroinitializer
- %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3
- ret <2 x double> %res
-}
-
-define <2 x double> @test_2xdouble_zero_masked_unpack_low_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x i64> %mask) {
-; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_2xdouble_zero_masked_unpack_low_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2>
- %cmp = icmp eq <2 x i64> %mask, zeroinitializer
- %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer
- ret <2 x double> %res
-}
-define <2 x double> @test_2xdouble_masked_unpack_low_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x i64> %mask) {
-; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_2xdouble_masked_unpack_low_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0] sched: [1:1.00]
-; SKX-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2>
- %cmp = icmp eq <2 x i64> %mask, zeroinitializer
- %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3
- ret <2 x double> %res
-}
-
-define <2 x double> @test_2xdouble_zero_masked_unpack_low_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x i64> %mask) {
-; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_2xdouble_zero_masked_unpack_low_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2>
- %cmp = icmp eq <2 x i64> %mask, zeroinitializer
- %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer
- ret <2 x double> %res
-}
-define <2 x double> @test_2xdouble_unpack_low_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p) {
-; GENERIC-LABEL: test_2xdouble_unpack_low_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_2xdouble_unpack_low_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <2 x double>, <2 x double>* %vec2p
- %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2>
- ret <2 x double> %res
-}
-define <2 x double> @test_2xdouble_masked_unpack_low_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x i64> %mask) {
-; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [7:1.00]
-; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_2xdouble_masked_unpack_low_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [7:1.00]
-; SKX-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <2 x double>, <2 x double>* %vec2p
- %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2>
- %cmp = icmp eq <2 x i64> %mask, zeroinitializer
- %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3
- ret <2 x double> %res
-}
-
-define <2 x double> @test_2xdouble_zero_masked_unpack_low_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x i64> %mask) {
-; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [7:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <2 x double>, <2 x double>* %vec2p
- %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2>
- %cmp = icmp eq <2 x i64> %mask, zeroinitializer
- %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer
- ret <2 x double> %res
-}
-
-define <2 x double> @test_2xdouble_masked_unpack_low_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x i64> %mask) {
-; GENERIC-LABEL: test_2xdouble_masked_unpack_low_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [7:1.00]
-; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_2xdouble_masked_unpack_low_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0] sched: [7:1.00]
-; SKX-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <2 x double>, <2 x double>* %vec2p
- %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2>
- %cmp = icmp eq <2 x i64> %mask, zeroinitializer
- %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3
- ret <2 x double> %res
-}
-
-define <2 x double> @test_2xdouble_zero_masked_unpack_low_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x i64> %mask) {
-; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [7:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <2 x double>, <2 x double>* %vec2p
- %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2>
- %cmp = icmp eq <2 x i64> %mask, zeroinitializer
- %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer
- ret <2 x double> %res
-}
-
-define <4 x double> @test_4xdouble_unpack_low_mask0(<4 x double> %vec1, <4 x double> %vec2) {
-; GENERIC-LABEL: test_4xdouble_unpack_low_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_unpack_low_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
- ret <4 x double> %res
-}
-define <4 x double> @test_4xdouble_masked_unpack_low_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_masked_unpack_low_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
-; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
- ret <4 x double> %res
-}
-
-define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
- ret <4 x double> %res
-}
-define <4 x double> @test_4xdouble_masked_unpack_low_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_masked_unpack_low_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
-; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
- ret <4 x double> %res
-}
-
-define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
- ret <4 x double> %res
-}
-define <4 x double> @test_4xdouble_masked_unpack_low_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_masked_unpack_low_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
-; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
- ret <4 x double> %res
-}
-
-define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
- ret <4 x double> %res
-}
-define <4 x double> @test_4xdouble_unpack_low_mask3(<4 x double> %vec1, <4 x double> %vec2) {
-; GENERIC-LABEL: test_4xdouble_unpack_low_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_unpack_low_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
- ret <4 x double> %res
-}
-define <4 x double> @test_4xdouble_masked_unpack_low_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_masked_unpack_low_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
-; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
- ret <4 x double> %res
-}
-
-define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
- ret <4 x double> %res
-}
-define <4 x double> @test_4xdouble_unpack_low_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p) {
-; GENERIC-LABEL: test_4xdouble_unpack_low_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_unpack_low_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x double>, <4 x double>* %vec2p
- %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
- ret <4 x double> %res
-}
-define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
-; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_masked_unpack_low_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
-; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x double>, <4 x double>* %vec2p
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
- ret <4 x double> %res
-}
-
-define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x double>, <4 x double>* %vec2p
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
- ret <4 x double> %res
-}
-
-define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
-; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_masked_unpack_low_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
-; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x double>, <4 x double>* %vec2p
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
- ret <4 x double> %res
-}
-
-define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x double>, <4 x double>* %vec2p
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
- ret <4 x double> %res
-}
-
-define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
-; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_masked_unpack_low_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
-; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x double>, <4 x double>* %vec2p
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
- ret <4 x double> %res
-}
-
-define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x double>, <4 x double>* %vec2p
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
- ret <4 x double> %res
-}
-
-define <4 x double> @test_4xdouble_unpack_low_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p) {
-; GENERIC-LABEL: test_4xdouble_unpack_low_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_unpack_low_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x double>, <4 x double>* %vec2p
- %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
- ret <4 x double> %res
-}
-define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_masked_unpack_low_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
-; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_masked_unpack_low_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
-; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x double>, <4 x double>* %vec2p
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
- ret <4 x double> %res
-}
-
-define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x double>, <4 x double>* %vec2p
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
- ret <4 x double> %res
-}
-
-define <8 x double> @test_8xdouble_unpack_low_mask0(<8 x double> %vec1, <8 x double> %vec2) {
-; GENERIC-LABEL: test_8xdouble_unpack_low_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_unpack_low_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
- ret <8 x double> %res
-}
-define <8 x double> @test_8xdouble_masked_unpack_low_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_masked_unpack_low_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00]
-; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
- ret <8 x double> %res
-}
-
-define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-define <8 x double> @test_8xdouble_masked_unpack_low_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_masked_unpack_low_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00]
-; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
- ret <8 x double> %res
-}
-
-define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-define <8 x double> @test_8xdouble_masked_unpack_low_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_masked_unpack_low_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00]
-; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
- ret <8 x double> %res
-}
-
-define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-define <8 x double> @test_8xdouble_unpack_low_mask3(<8 x double> %vec1, <8 x double> %vec2) {
-; GENERIC-LABEL: test_8xdouble_unpack_low_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_unpack_low_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
- ret <8 x double> %res
-}
-define <8 x double> @test_8xdouble_masked_unpack_low_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_masked_unpack_low_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00]
-; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
- ret <8 x double> %res
-}
-
-define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-define <8 x double> @test_8xdouble_unpack_low_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) {
-; GENERIC-LABEL: test_8xdouble_unpack_low_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_unpack_low_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x double>, <8 x double>* %vec2p
- %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
- ret <8 x double> %res
-}
-define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
-; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_masked_unpack_low_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
-; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x double>, <8 x double>* %vec2p
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
- ret <8 x double> %res
-}
-
-define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x double>, <8 x double>* %vec2p
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-
-define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
-; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_masked_unpack_low_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
-; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x double>, <8 x double>* %vec2p
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
- ret <8 x double> %res
-}
-
-define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x double>, <8 x double>* %vec2p
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-
-define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
-; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_masked_unpack_low_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
-; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x double>, <8 x double>* %vec2p
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
- ret <8 x double> %res
-}
-
-define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x double>, <8 x double>* %vec2p
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-
-define <8 x double> @test_8xdouble_unpack_low_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) {
-; GENERIC-LABEL: test_8xdouble_unpack_low_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_unpack_low_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x double>, <8 x double>* %vec2p
- %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
- ret <8 x double> %res
-}
-define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_masked_unpack_low_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
-; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_masked_unpack_low_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
-; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x double>, <8 x double>* %vec2p
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
- ret <8 x double> %res
-}
-
-define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x double>, <8 x double>* %vec2p
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-
-define <4 x float> @test_4xfloat_unpack_high_mask0(<4 x float> %vec1, <4 x float> %vec2) {
-; GENERIC-LABEL: test_4xfloat_unpack_high_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_unpack_high_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
- ret <4 x float> %res
-}
-define <4 x float> @test_4xfloat_masked_unpack_high_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) {
-; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_masked_unpack_high_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
- ret <4 x float> %res
-}
-
-define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) {
-; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
- ret <4 x float> %res
-}
-define <4 x float> @test_4xfloat_masked_unpack_high_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) {
-; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_masked_unpack_high_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
- ret <4 x float> %res
-}
-
-define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) {
-; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
- ret <4 x float> %res
-}
-define <4 x float> @test_4xfloat_masked_unpack_high_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) {
-; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_masked_unpack_high_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
- ret <4 x float> %res
-}
-
-define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) {
-; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
- ret <4 x float> %res
-}
-define <4 x float> @test_4xfloat_unpack_high_mask3(<4 x float> %vec1, <4 x float> %vec2) {
-; GENERIC-LABEL: test_4xfloat_unpack_high_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_unpack_high_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
- ret <4 x float> %res
-}
-define <4 x float> @test_4xfloat_masked_unpack_high_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x i32> %mask) {
-; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_masked_unpack_high_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm3, %xmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; SKX-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
- ret <4 x float> %res
-}
-
-define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x i32> %mask) {
-; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
- ret <4 x float> %res
-}
-define <4 x float> @test_4xfloat_unpack_high_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p) {
-; GENERIC-LABEL: test_4xfloat_unpack_high_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_unpack_high_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x float>, <4 x float>* %vec2p
- %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
- ret <4 x float> %res
-}
-define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) {
-; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
-; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_masked_unpack_high_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
-; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x float>, <4 x float>* %vec2p
- %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
- ret <4 x float> %res
-}
-
-define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) {
-; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x float>, <4 x float>* %vec2p
- %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
- ret <4 x float> %res
-}
-
-define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) {
-; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
-; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_masked_unpack_high_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
-; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x float>, <4 x float>* %vec2p
- %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
- ret <4 x float> %res
-}
-
-define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) {
-; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x float>, <4 x float>* %vec2p
- %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
- ret <4 x float> %res
-}
-
-define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) {
-; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
-; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_masked_unpack_high_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
-; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x float>, <4 x float>* %vec2p
- %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
- ret <4 x float> %res
-}
-
-define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) {
-; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x float>, <4 x float>* %vec2p
- %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
- ret <4 x float> %res
-}
-
-define <4 x float> @test_4xfloat_unpack_high_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p) {
-; GENERIC-LABEL: test_4xfloat_unpack_high_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_unpack_high_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x float>, <4 x float>* %vec2p
- %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
- ret <4 x float> %res
-}
-define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x i32> %mask) {
-; GENERIC-LABEL: test_4xfloat_masked_unpack_high_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
-; GENERIC-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_masked_unpack_high_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
-; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x float>, <4 x float>* %vec2p
- %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
- ret <4 x float> %res
-}
-
-define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x i32> %mask) {
-; GENERIC-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x float>, <4 x float>* %vec2p
- %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
- %cmp = icmp eq <4 x i32> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
- ret <4 x float> %res
-}
-
-define <8 x float> @test_8xfloat_unpack_high_mask0(<8 x float> %vec1, <8 x float> %vec2) {
-; GENERIC-LABEL: test_8xfloat_unpack_high_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_unpack_high_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
- ret <8 x float> %res
-}
-define <8 x float> @test_8xfloat_masked_unpack_high_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_masked_unpack_high_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
-; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
- ret <8 x float> %res
-}
-
-define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
- ret <8 x float> %res
-}
-define <8 x float> @test_8xfloat_masked_unpack_high_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_masked_unpack_high_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
-; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
- ret <8 x float> %res
-}
-
-define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
- ret <8 x float> %res
-}
-define <8 x float> @test_8xfloat_masked_unpack_high_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_masked_unpack_high_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
-; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
- ret <8 x float> %res
-}
-
-define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
- ret <8 x float> %res
-}
-define <8 x float> @test_8xfloat_unpack_high_mask3(<8 x float> %vec1, <8 x float> %vec2) {
-; GENERIC-LABEL: test_8xfloat_unpack_high_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_unpack_high_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
- ret <8 x float> %res
-}
-define <8 x float> @test_8xfloat_masked_unpack_high_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_masked_unpack_high_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm3, %ymm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
-; SKX-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
- ret <8 x float> %res
-}
-
-define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
- ret <8 x float> %res
-}
-define <8 x float> @test_8xfloat_unpack_high_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p) {
-; GENERIC-LABEL: test_8xfloat_unpack_high_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_unpack_high_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x float>, <8 x float>* %vec2p
- %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
- ret <8 x float> %res
-}
-define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
-; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_masked_unpack_high_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
-; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x float>, <8 x float>* %vec2p
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
- ret <8 x float> %res
-}
-
-define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x float>, <8 x float>* %vec2p
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
- ret <8 x float> %res
-}
-
-define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
-; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_masked_unpack_high_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
-; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x float>, <8 x float>* %vec2p
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
- ret <8 x float> %res
-}
-
-define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x float>, <8 x float>* %vec2p
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
- ret <8 x float> %res
-}
-
-define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
-; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_masked_unpack_high_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
-; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x float>, <8 x float>* %vec2p
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
- ret <8 x float> %res
-}
-
-define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x float>, <8 x float>* %vec2p
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
- ret <8 x float> %res
-}
-
-define <8 x float> @test_8xfloat_unpack_high_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p) {
-; GENERIC-LABEL: test_8xfloat_unpack_high_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_unpack_high_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x float>, <8 x float>* %vec2p
- %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
- ret <8 x float> %res
-}
-define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_masked_unpack_high_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
-; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_masked_unpack_high_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
-; SKX-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x float>, <8 x float>* %vec2p
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
- ret <8 x float> %res
-}
-
-define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x i32> %mask) {
-; GENERIC-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x float>, <8 x float>* %vec2p
- %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
- %cmp = icmp eq <8 x i32> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
- ret <8 x float> %res
-}
-
-define <16 x float> @test_16xfloat_unpack_high_mask0(<16 x float> %vec1, <16 x float> %vec2) {
-; GENERIC-LABEL: test_16xfloat_unpack_high_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_unpack_high_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
- ret <16 x float> %res
-}
-define <16 x float> @test_16xfloat_masked_unpack_high_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_masked_unpack_high_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00]
-; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
- ret <16 x float> %res
-}
-
-define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
- ret <16 x float> %res
-}
-define <16 x float> @test_16xfloat_masked_unpack_high_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_masked_unpack_high_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00]
-; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
- ret <16 x float> %res
-}
-
-define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
- ret <16 x float> %res
-}
-define <16 x float> @test_16xfloat_masked_unpack_high_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_masked_unpack_high_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00]
-; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
- ret <16 x float> %res
-}
-
-define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
- ret <16 x float> %res
-}
-define <16 x float> @test_16xfloat_unpack_high_mask3(<16 x float> %vec1, <16 x float> %vec2) {
-; GENERIC-LABEL: test_16xfloat_unpack_high_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_unpack_high_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
- ret <16 x float> %res
-}
-define <16 x float> @test_16xfloat_masked_unpack_high_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00]
-; GENERIC-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_masked_unpack_high_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00]
-; SKX-NEXT: vmovaps %zmm2, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
- ret <16 x float> %res
-}
-
-define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
- ret <16 x float> %res
-}
-define <16 x float> @test_16xfloat_unpack_high_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) {
-; GENERIC-LABEL: test_16xfloat_unpack_high_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_unpack_high_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x float>, <16 x float>* %vec2p
- %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
- ret <16 x float> %res
-}
-define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
-; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_masked_unpack_high_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
-; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x float>, <16 x float>* %vec2p
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
- ret <16 x float> %res
-}
-
-define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x float>, <16 x float>* %vec2p
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
- ret <16 x float> %res
-}
-
-define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
-; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_masked_unpack_high_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
-; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x float>, <16 x float>* %vec2p
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
- ret <16 x float> %res
-}
-
-define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x float>, <16 x float>* %vec2p
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
- ret <16 x float> %res
-}
-
-define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
-; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_masked_unpack_high_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
-; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x float>, <16 x float>* %vec2p
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
- ret <16 x float> %res
-}
-
-define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x float>, <16 x float>* %vec2p
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
- ret <16 x float> %res
-}
-
-define <16 x float> @test_16xfloat_unpack_high_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) {
-; GENERIC-LABEL: test_16xfloat_unpack_high_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_unpack_high_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x float>, <16 x float>* %vec2p
- %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
- ret <16 x float> %res
-}
-define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_masked_unpack_high_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
-; GENERIC-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_masked_unpack_high_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
-; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x float>, <16 x float>* %vec2p
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
- ret <16 x float> %res
-}
-
-define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x i32> %mask) {
-; GENERIC-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmd %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <16 x float>, <16 x float>* %vec2p
- %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
- %cmp = icmp eq <16 x i32> %mask, zeroinitializer
- %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
- ret <16 x float> %res
-}
-
-define <2 x double> @test_2xdouble_unpack_high_mask0(<2 x double> %vec1, <2 x double> %vec2) {
-; GENERIC-LABEL: test_2xdouble_unpack_high_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_2xdouble_unpack_high_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3>
- ret <2 x double> %res
-}
-define <2 x double> @test_2xdouble_masked_unpack_high_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x i64> %mask) {
-; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_2xdouble_masked_unpack_high_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] sched: [1:1.00]
-; SKX-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3>
- %cmp = icmp eq <2 x i64> %mask, zeroinitializer
- %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3
- ret <2 x double> %res
-}
-
-define <2 x double> @test_2xdouble_zero_masked_unpack_high_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x i64> %mask) {
-; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_2xdouble_zero_masked_unpack_high_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3>
- %cmp = icmp eq <2 x i64> %mask, zeroinitializer
- %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer
- ret <2 x double> %res
-}
-define <2 x double> @test_2xdouble_masked_unpack_high_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x i64> %mask) {
-; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_2xdouble_masked_unpack_high_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %xmm3, %xmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1] sched: [1:1.00]
-; SKX-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3>
- %cmp = icmp eq <2 x i64> %mask, zeroinitializer
- %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3
- ret <2 x double> %res
-}
-
-define <2 x double> @test_2xdouble_zero_masked_unpack_high_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x i64> %mask) {
-; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_2xdouble_zero_masked_unpack_high_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3>
- %cmp = icmp eq <2 x i64> %mask, zeroinitializer
- %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer
- ret <2 x double> %res
-}
-define <2 x double> @test_2xdouble_unpack_high_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p) {
-; GENERIC-LABEL: test_2xdouble_unpack_high_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1] sched: [7:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_2xdouble_unpack_high_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <2 x double>, <2 x double>* %vec2p
- %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3>
- ret <2 x double> %res
-}
-define <2 x double> @test_2xdouble_masked_unpack_high_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x i64> %mask) {
-; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [7:1.00]
-; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_2xdouble_masked_unpack_high_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [7:1.00]
-; SKX-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <2 x double>, <2 x double>* %vec2p
- %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3>
- %cmp = icmp eq <2 x i64> %mask, zeroinitializer
- %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3
- ret <2 x double> %res
-}
-
-define <2 x double> @test_2xdouble_zero_masked_unpack_high_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x i64> %mask) {
-; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [7:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <2 x double>, <2 x double>* %vec2p
- %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3>
- %cmp = icmp eq <2 x i64> %mask, zeroinitializer
- %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer
- ret <2 x double> %res
-}
-
-define <2 x double> @test_2xdouble_masked_unpack_high_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x i64> %mask) {
-; GENERIC-LABEL: test_2xdouble_masked_unpack_high_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [7:1.00]
-; GENERIC-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_2xdouble_masked_unpack_high_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %xmm2, %xmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1] sched: [7:1.00]
-; SKX-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <2 x double>, <2 x double>* %vec2p
- %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3>
- %cmp = icmp eq <2 x i64> %mask, zeroinitializer
- %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3
- ret <2 x double> %res
-}
-
-define <2 x double> @test_2xdouble_zero_masked_unpack_high_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x i64> %mask) {
-; GENERIC-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [7:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %xmm1, %xmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <2 x double>, <2 x double>* %vec2p
- %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3>
- %cmp = icmp eq <2 x i64> %mask, zeroinitializer
- %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer
- ret <2 x double> %res
-}
-
-define <4 x double> @test_4xdouble_unpack_high_mask0(<4 x double> %vec1, <4 x double> %vec2) {
-; GENERIC-LABEL: test_4xdouble_unpack_high_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_unpack_high_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
- ret <4 x double> %res
-}
-define <4 x double> @test_4xdouble_masked_unpack_high_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_masked_unpack_high_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
-; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
- ret <4 x double> %res
-}
-
-define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
- ret <4 x double> %res
-}
-define <4 x double> @test_4xdouble_masked_unpack_high_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_masked_unpack_high_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
-; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
- ret <4 x double> %res
-}
-
-define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
- ret <4 x double> %res
-}
-define <4 x double> @test_4xdouble_masked_unpack_high_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_masked_unpack_high_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
-; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
- ret <4 x double> %res
-}
-
-define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
- ret <4 x double> %res
-}
-define <4 x double> @test_4xdouble_unpack_high_mask3(<4 x double> %vec1, <4 x double> %vec2) {
-; GENERIC-LABEL: test_4xdouble_unpack_high_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_unpack_high_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
- ret <4 x double> %res
-}
-define <4 x double> @test_4xdouble_masked_unpack_high_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_masked_unpack_high_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm3, %ymm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
-; SKX-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
- ret <4 x double> %res
-}
-
-define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
- ret <4 x double> %res
-}
-define <4 x double> @test_4xdouble_unpack_high_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p) {
-; GENERIC-LABEL: test_4xdouble_unpack_high_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_unpack_high_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x double>, <4 x double>* %vec2p
- %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
- ret <4 x double> %res
-}
-define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
-; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_masked_unpack_high_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
-; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x double>, <4 x double>* %vec2p
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
- ret <4 x double> %res
-}
-
-define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x double>, <4 x double>* %vec2p
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
- ret <4 x double> %res
-}
-
-define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
-; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_masked_unpack_high_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
-; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x double>, <4 x double>* %vec2p
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
- ret <4 x double> %res
-}
-
-define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x double>, <4 x double>* %vec2p
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
- ret <4 x double> %res
-}
-
-define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
-; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_masked_unpack_high_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
-; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x double>, <4 x double>* %vec2p
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
- ret <4 x double> %res
-}
-
-define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x double>, <4 x double>* %vec2p
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
- ret <4 x double> %res
-}
-
-define <4 x double> @test_4xdouble_unpack_high_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p) {
-; GENERIC-LABEL: test_4xdouble_unpack_high_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_unpack_high_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x double>, <4 x double>* %vec2p
- %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
- ret <4 x double> %res
-}
-define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_masked_unpack_high_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
-; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_masked_unpack_high_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm2, %ymm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
-; SKX-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x double>, <4 x double>* %vec2p
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
- ret <4 x double> %res
-}
-
-define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x i64> %mask) {
-; GENERIC-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %ymm1, %ymm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <4 x double>, <4 x double>* %vec2p
- %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
- %cmp = icmp eq <4 x i64> %mask, zeroinitializer
- %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
- ret <4 x double> %res
-}
-
-define <8 x double> @test_8xdouble_unpack_high_mask0(<8 x double> %vec1, <8 x double> %vec2) {
-; GENERIC-LABEL: test_8xdouble_unpack_high_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_unpack_high_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
- ret <8 x double> %res
-}
-define <8 x double> @test_8xdouble_masked_unpack_high_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_masked_unpack_high_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00]
-; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
- ret <8 x double> %res
-}
-
-define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-define <8 x double> @test_8xdouble_masked_unpack_high_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_masked_unpack_high_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00]
-; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
- ret <8 x double> %res
-}
-
-define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-define <8 x double> @test_8xdouble_masked_unpack_high_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_masked_unpack_high_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00]
-; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
- ret <8 x double> %res
-}
-
-define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-define <8 x double> @test_8xdouble_unpack_high_mask3(<8 x double> %vec1, <8 x double> %vec2) {
-; GENERIC-LABEL: test_8xdouble_unpack_high_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_unpack_high_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
- ret <8 x double> %res
-}
-define <8 x double> @test_8xdouble_masked_unpack_high_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00]
-; GENERIC-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_masked_unpack_high_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm3, %zmm3, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00]
-; SKX-NEXT: vmovapd %zmm2, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
- ret <8 x double> %res
-}
-
-define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-define <8 x double> @test_8xdouble_unpack_high_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) {
-; GENERIC-LABEL: test_8xdouble_unpack_high_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_unpack_high_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x double>, <8 x double>* %vec2p
- %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
- ret <8 x double> %res
-}
-define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
-; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_masked_unpack_high_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
-; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x double>, <8 x double>* %vec2p
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
- ret <8 x double> %res
-}
-
-define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask0:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask0:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x double>, <8 x double>* %vec2p
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-
-define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
-; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_masked_unpack_high_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
-; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x double>, <8 x double>* %vec2p
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
- ret <8 x double> %res
-}
-
-define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask1:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x double>, <8 x double>* %vec2p
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-
-define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
-; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_masked_unpack_high_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
-; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x double>, <8 x double>* %vec2p
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
- ret <8 x double> %res
-}
-
-define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask2:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x double>, <8 x double>* %vec2p
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-
-define <8 x double> @test_8xdouble_unpack_high_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) {
-; GENERIC-LABEL: test_8xdouble_unpack_high_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_unpack_high_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x double>, <8 x double>* %vec2p
- %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
- ret <8 x double> %res
-}
-define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_masked_unpack_high_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
-; GENERIC-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_masked_unpack_high_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm2, %zmm2, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
-; SKX-NEXT: vmovapd %zmm1, %zmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x double>, <8 x double>* %vec2p
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
- ret <8 x double> %res
-}
-
-define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x i64> %mask) {
-; GENERIC-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask3:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask3:
-; SKX: # %bb.0:
-; SKX-NEXT: vptestnmq %zmm1, %zmm1, %k1 # sched: [3:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7] sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
- %vec2 = load <8 x double>, <8 x double>* %vec2p
- %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
- %cmp = icmp eq <8 x i64> %mask, zeroinitializer
- %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
- ret <8 x double> %res
-}
-
diff --git a/llvm/test/CodeGen/X86/avx512vpopcntdq-schedule.ll b/llvm/test/CodeGen/X86/avx512vpopcntdq-schedule.ll
deleted file mode 100644
index f44374e802d..00000000000
--- a/llvm/test/CodeGen/X86/avx512vpopcntdq-schedule.ll
+++ /dev/null
@@ -1,80 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx512vpopcntdq | FileCheck %s --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=icelake-client | FileCheck %s --check-prefix=ICELAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=icelake-server | FileCheck %s --check-prefix=ICELAKE
-
-define void @test_vpopcntd(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> *%a2, i16 %a3) {
-; GENERIC-LABEL: test_vpopcntd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovw %esi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vpopcntd %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpopcntd %zmm1, %zmm0 {%k1} # sched: [1:0.50]
-; GENERIC-NEXT: vpopcntd %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50]
-; GENERIC-NEXT: vpopcntd (%rdi), %zmm0 # sched: [8:0.50]
-; GENERIC-NEXT: vpopcntd (%rdi), %zmm0 {%k1} # sched: [8:0.50]
-; GENERIC-NEXT: vpopcntd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
-; GENERIC-NEXT: vpopcntd (%rdi){1to16}, %zmm0 # sched: [8:0.50]
-; GENERIC-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} # sched: [8:0.50]
-; GENERIC-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} {z} # sched: [8:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ICELAKE-LABEL: test_vpopcntd:
-; ICELAKE: # %bb.0:
-; ICELAKE-NEXT: kmovd %esi, %k1 # sched: [1:1.00]
-; ICELAKE-NEXT: #APP
-; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 # sched: [1:1.00]
-; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 {%k1} # sched: [1:1.00]
-; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00]
-; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 # sched: [8:1.00]
-; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 {%k1} # sched: [8:1.00]
-; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
-; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 # sched: [8:1.00]
-; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} # sched: [8:1.00]
-; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} {z} # sched: [8:1.00]
-; ICELAKE-NEXT: #NO_APP
-; ICELAKE-NEXT: vzeroupper # sched: [0:0.67]
-; ICELAKE-NEXT: retq # sched: [7:1.00]
- tail call void asm "vpopcntd $1, $0 \0A\09 vpopcntd $1, $0 {$3} \0A\09 vpopcntd $1, $0 {$3} {z} \0A\09 vpopcntd $2, $0 \0A\09 vpopcntd $2, $0 {$3} \0A\09 vpopcntd $2, $0 {$3} {z} \0A\09 vpopcntd $2{1to16}, $0 \0A\09 vpopcntd $2{1to16}, $0 {$3} \0A\09 vpopcntd $2{1to16}, $0 {$3} {z}", "v,v,*m,^Yk"(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> *%a2, i16 %a3) nounwind
- ret void
-}
-
-define void @test_vpopcntq(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> *%a2, i8 %a3) {
-; GENERIC-LABEL: test_vpopcntq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: kmovw %esi, %k1 # sched: [1:0.33]
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vpopcntq %zmm1, %zmm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpopcntq %zmm1, %zmm0 {%k1} # sched: [1:0.50]
-; GENERIC-NEXT: vpopcntq %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50]
-; GENERIC-NEXT: vpopcntq (%rdi), %zmm0 # sched: [8:0.50]
-; GENERIC-NEXT: vpopcntq (%rdi), %zmm0 {%k1} # sched: [8:0.50]
-; GENERIC-NEXT: vpopcntq (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50]
-; GENERIC-NEXT: vpopcntq (%rdi){1to8}, %zmm0 # sched: [8:0.50]
-; GENERIC-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} # sched: [8:0.50]
-; GENERIC-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} {z} # sched: [8:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ICELAKE-LABEL: test_vpopcntq:
-; ICELAKE: # %bb.0:
-; ICELAKE-NEXT: kmovd %esi, %k1 # sched: [1:1.00]
-; ICELAKE-NEXT: #APP
-; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 # sched: [1:1.00]
-; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 {%k1} # sched: [1:1.00]
-; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00]
-; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 # sched: [8:1.00]
-; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 {%k1} # sched: [8:1.00]
-; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00]
-; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 # sched: [8:1.00]
-; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} # sched: [8:1.00]
-; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} {z} # sched: [8:1.00]
-; ICELAKE-NEXT: #NO_APP
-; ICELAKE-NEXT: vzeroupper # sched: [0:0.67]
-; ICELAKE-NEXT: retq # sched: [7:1.00]
- tail call void asm "vpopcntq $1, $0 \0A\09 vpopcntq $1, $0 {$3} \0A\09 vpopcntq $1, $0 {$3} {z} \0A\09 vpopcntq $2, $0 \0A\09 vpopcntq $2, $0 {$3} \0A\09 vpopcntq $2, $0 {$3} {z} \0A\09 vpopcntq $2{1to8}, $0 \0A\09 vpopcntq $2{1to8}, $0 {$3} \0A\09 vpopcntq $2{1to8}, $0 {$3} {z}", "v,v,*m,^Yk"(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> *%a2, i8 %a3) nounwind
- ret void
-}
diff --git a/llvm/test/CodeGen/X86/bmi-schedule.ll b/llvm/test/CodeGen/X86/bmi-schedule.ll
deleted file mode 100644
index 5b5b388c100..00000000000
--- a/llvm/test/CodeGen/X86/bmi-schedule.ll
+++ /dev/null
@@ -1,763 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+bmi | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
-
-define i32 @test_andn_i32(i32 %a0, i32 %a1, i32 *%a2) {
-; GENERIC-LABEL: test_andn_i32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: andnl %esi, %edi, %ecx # sched: [1:0.33]
-; GENERIC-NEXT: andnl (%rdx), %edi, %eax # sched: [6:0.50]
-; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_andn_i32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: andnl %esi, %edi, %ecx # sched: [1:0.50]
-; HASWELL-NEXT: andnl (%rdx), %edi, %eax # sched: [6:0.50]
-; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_andn_i32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: andnl %esi, %edi, %ecx # sched: [1:0.50]
-; BROADWELL-NEXT: andnl (%rdx), %edi, %eax # sched: [6:0.50]
-; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_andn_i32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: andnl %esi, %edi, %ecx # sched: [1:0.50]
-; SKYLAKE-NEXT: andnl (%rdx), %edi, %eax # sched: [6:0.50]
-; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_andn_i32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: andnl (%rdx), %edi, %eax # sched: [5:0.50]
-; BDVER2-NEXT: andnl %esi, %edi, %ecx # sched: [1:0.50]
-; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_andn_i32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: andnl (%rdx), %edi, %eax # sched: [4:1.00]
-; BTVER2-NEXT: andnl %esi, %edi, %ecx # sched: [1:0.50]
-; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_andn_i32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: andnl (%rdx), %edi, %eax # sched: [5:0.50]
-; ZNVER1-NEXT: andnl %esi, %edi, %ecx # sched: [1:0.25]
-; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i32, i32 *%a2
- %2 = xor i32 %a0, -1
- %3 = and i32 %2, %a1
- %4 = and i32 %2, %1
- %5 = add i32 %3, %4
- ret i32 %5
-}
-
-define i64 @test_andn_i64(i64 %a0, i64 %a1, i64 *%a2) {
-; GENERIC-LABEL: test_andn_i64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: andnq %rsi, %rdi, %rcx # sched: [1:0.33]
-; GENERIC-NEXT: andnq (%rdx), %rdi, %rax # sched: [6:0.50]
-; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_andn_i64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: andnq %rsi, %rdi, %rcx # sched: [1:0.50]
-; HASWELL-NEXT: andnq (%rdx), %rdi, %rax # sched: [6:0.50]
-; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_andn_i64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: andnq %rsi, %rdi, %rcx # sched: [1:0.50]
-; BROADWELL-NEXT: andnq (%rdx), %rdi, %rax # sched: [6:0.50]
-; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_andn_i64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: andnq %rsi, %rdi, %rcx # sched: [1:0.50]
-; SKYLAKE-NEXT: andnq (%rdx), %rdi, %rax # sched: [6:0.50]
-; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_andn_i64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: andnq (%rdx), %rdi, %rax # sched: [5:0.50]
-; BDVER2-NEXT: andnq %rsi, %rdi, %rcx # sched: [1:0.50]
-; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_andn_i64:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: andnq (%rdx), %rdi, %rax # sched: [4:1.00]
-; BTVER2-NEXT: andnq %rsi, %rdi, %rcx # sched: [1:0.50]
-; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_andn_i64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: andnq (%rdx), %rdi, %rax # sched: [5:0.50]
-; ZNVER1-NEXT: andnq %rsi, %rdi, %rcx # sched: [1:0.25]
-; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i64, i64 *%a2
- %2 = xor i64 %a0, -1
- %3 = and i64 %2, %a1
- %4 = and i64 %2, %1
- %5 = add i64 %3, %4
- ret i64 %5
-}
-
-define i32 @test_bextr_i32(i32 %a0, i32 %a1, i32 *%a2) {
-; GENERIC-LABEL: test_bextr_i32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: bextrl %edi, (%rdx), %ecx # sched: [7:1.00]
-; GENERIC-NEXT: bextrl %edi, %esi, %eax # sched: [2:1.00]
-; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_bextr_i32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: bextrl %edi, (%rdx), %ecx # sched: [7:0.50]
-; HASWELL-NEXT: bextrl %edi, %esi, %eax # sched: [2:0.50]
-; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_bextr_i32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: bextrl %edi, (%rdx), %ecx # sched: [7:0.50]
-; BROADWELL-NEXT: bextrl %edi, %esi, %eax # sched: [2:0.50]
-; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_bextr_i32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: bextrl %edi, (%rdx), %ecx # sched: [7:0.50]
-; SKYLAKE-NEXT: bextrl %edi, %esi, %eax # sched: [2:0.50]
-; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_bextr_i32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: bextrl %edi, (%rdx), %ecx # sched: [6:0.50]
-; BDVER2-NEXT: bextrl %edi, %esi, %eax # sched: [2:0.50]
-; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_bextr_i32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: bextrl %edi, (%rdx), %ecx # sched: [4:1.00]
-; BTVER2-NEXT: bextrl %edi, %esi, %eax # sched: [1:0.50]
-; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_bextr_i32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: bextrl %edi, (%rdx), %ecx # sched: [5:0.50]
-; ZNVER1-NEXT: bextrl %edi, %esi, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i32, i32 *%a2
- %2 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %1, i32 %a0)
- %3 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %a1, i32 %a0)
- %4 = add i32 %2, %3
- ret i32 %4
-}
-declare i32 @llvm.x86.bmi.bextr.32(i32, i32)
-
-define i64 @test_bextr_i64(i64 %a0, i64 %a1, i64 *%a2) {
-; GENERIC-LABEL: test_bextr_i64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: bextrq %rdi, (%rdx), %rcx # sched: [7:1.00]
-; GENERIC-NEXT: bextrq %rdi, %rsi, %rax # sched: [2:1.00]
-; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_bextr_i64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: bextrq %rdi, (%rdx), %rcx # sched: [7:0.50]
-; HASWELL-NEXT: bextrq %rdi, %rsi, %rax # sched: [2:0.50]
-; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_bextr_i64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: bextrq %rdi, (%rdx), %rcx # sched: [7:0.50]
-; BROADWELL-NEXT: bextrq %rdi, %rsi, %rax # sched: [2:0.50]
-; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_bextr_i64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: bextrq %rdi, (%rdx), %rcx # sched: [7:0.50]
-; SKYLAKE-NEXT: bextrq %rdi, %rsi, %rax # sched: [2:0.50]
-; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_bextr_i64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: bextrq %rdi, (%rdx), %rcx # sched: [6:0.50]
-; BDVER2-NEXT: bextrq %rdi, %rsi, %rax # sched: [2:0.50]
-; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_bextr_i64:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: bextrq %rdi, (%rdx), %rcx # sched: [4:1.00]
-; BTVER2-NEXT: bextrq %rdi, %rsi, %rax # sched: [1:0.50]
-; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_bextr_i64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: bextrq %rdi, (%rdx), %rcx # sched: [5:0.50]
-; ZNVER1-NEXT: bextrq %rdi, %rsi, %rax # sched: [1:0.25]
-; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i64, i64 *%a2
- %2 = tail call i64 @llvm.x86.bmi.bextr.64(i64 %1, i64 %a0)
- %3 = tail call i64 @llvm.x86.bmi.bextr.64(i64 %a1, i64 %a0)
- %4 = add i64 %2, %3
- ret i64 %4
-}
-declare i64 @llvm.x86.bmi.bextr.64(i64, i64)
-
-define i32 @test_blsi_i32(i32 %a0, i32 *%a1) {
-; GENERIC-LABEL: test_blsi_i32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: blsil (%rsi), %ecx # sched: [6:0.50]
-; GENERIC-NEXT: blsil %edi, %eax # sched: [1:0.33]
-; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_blsi_i32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: blsil (%rsi), %ecx # sched: [6:0.50]
-; HASWELL-NEXT: blsil %edi, %eax # sched: [1:0.50]
-; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_blsi_i32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: blsil (%rsi), %ecx # sched: [6:0.50]
-; BROADWELL-NEXT: blsil %edi, %eax # sched: [1:0.50]
-; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_blsi_i32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: blsil (%rsi), %ecx # sched: [6:0.50]
-; SKYLAKE-NEXT: blsil %edi, %eax # sched: [1:0.50]
-; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_blsi_i32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: blsil (%rsi), %ecx # sched: [6:0.50]
-; BDVER2-NEXT: blsil %edi, %eax # sched: [2:0.50]
-; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_blsi_i32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: blsil (%rsi), %ecx # sched: [5:1.00]
-; BTVER2-NEXT: blsil %edi, %eax # sched: [2:1.00]
-; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_blsi_i32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: blsil (%rsi), %ecx # sched: [6:0.50]
-; ZNVER1-NEXT: blsil %edi, %eax # sched: [2:0.25]
-; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i32, i32 *%a1
- %2 = sub i32 0, %1
- %3 = sub i32 0, %a0
- %4 = and i32 %1, %2
- %5 = and i32 %a0, %3
- %6 = add i32 %4, %5
- ret i32 %6
-}
-
-define i64 @test_blsi_i64(i64 %a0, i64 *%a1) {
-; GENERIC-LABEL: test_blsi_i64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: blsiq (%rsi), %rcx # sched: [6:0.50]
-; GENERIC-NEXT: blsiq %rdi, %rax # sched: [1:0.33]
-; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_blsi_i64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: blsiq (%rsi), %rcx # sched: [6:0.50]
-; HASWELL-NEXT: blsiq %rdi, %rax # sched: [1:0.50]
-; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_blsi_i64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: blsiq (%rsi), %rcx # sched: [6:0.50]
-; BROADWELL-NEXT: blsiq %rdi, %rax # sched: [1:0.50]
-; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_blsi_i64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: blsiq (%rsi), %rcx # sched: [6:0.50]
-; SKYLAKE-NEXT: blsiq %rdi, %rax # sched: [1:0.50]
-; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_blsi_i64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: blsiq (%rsi), %rcx # sched: [6:0.50]
-; BDVER2-NEXT: blsiq %rdi, %rax # sched: [2:0.50]
-; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_blsi_i64:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: blsiq (%rsi), %rcx # sched: [5:1.00]
-; BTVER2-NEXT: blsiq %rdi, %rax # sched: [2:1.00]
-; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_blsi_i64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: blsiq (%rsi), %rcx # sched: [6:0.50]
-; ZNVER1-NEXT: blsiq %rdi, %rax # sched: [2:0.25]
-; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i64, i64 *%a1
- %2 = sub i64 0, %1
- %3 = sub i64 0, %a0
- %4 = and i64 %1, %2
- %5 = and i64 %a0, %3
- %6 = add i64 %4, %5
- ret i64 %6
-}
-
-define i32 @test_blsmsk_i32(i32 %a0, i32 *%a1) {
-; GENERIC-LABEL: test_blsmsk_i32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: blsmskl (%rsi), %ecx # sched: [6:0.50]
-; GENERIC-NEXT: blsmskl %edi, %eax # sched: [1:0.33]
-; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_blsmsk_i32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: blsmskl (%rsi), %ecx # sched: [6:0.50]
-; HASWELL-NEXT: blsmskl %edi, %eax # sched: [1:0.50]
-; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_blsmsk_i32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: blsmskl (%rsi), %ecx # sched: [6:0.50]
-; BROADWELL-NEXT: blsmskl %edi, %eax # sched: [1:0.50]
-; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_blsmsk_i32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: blsmskl (%rsi), %ecx # sched: [6:0.50]
-; SKYLAKE-NEXT: blsmskl %edi, %eax # sched: [1:0.50]
-; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_blsmsk_i32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: blsmskl (%rsi), %ecx # sched: [6:0.50]
-; BDVER2-NEXT: blsmskl %edi, %eax # sched: [2:0.50]
-; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_blsmsk_i32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: blsmskl (%rsi), %ecx # sched: [5:1.00]
-; BTVER2-NEXT: blsmskl %edi, %eax # sched: [2:1.00]
-; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_blsmsk_i32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: blsmskl (%rsi), %ecx # sched: [6:0.50]
-; ZNVER1-NEXT: blsmskl %edi, %eax # sched: [2:0.25]
-; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i32, i32 *%a1
- %2 = sub i32 %1, 1
- %3 = sub i32 %a0, 1
- %4 = xor i32 %1, %2
- %5 = xor i32 %a0, %3
- %6 = add i32 %4, %5
- ret i32 %6
-}
-
-define i64 @test_blsmsk_i64(i64 %a0, i64 *%a1) {
-; GENERIC-LABEL: test_blsmsk_i64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: blsmskq (%rsi), %rcx # sched: [6:0.50]
-; GENERIC-NEXT: blsmskq %rdi, %rax # sched: [1:0.33]
-; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_blsmsk_i64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: blsmskq (%rsi), %rcx # sched: [6:0.50]
-; HASWELL-NEXT: blsmskq %rdi, %rax # sched: [1:0.50]
-; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_blsmsk_i64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: blsmskq (%rsi), %rcx # sched: [6:0.50]
-; BROADWELL-NEXT: blsmskq %rdi, %rax # sched: [1:0.50]
-; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_blsmsk_i64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: blsmskq (%rsi), %rcx # sched: [6:0.50]
-; SKYLAKE-NEXT: blsmskq %rdi, %rax # sched: [1:0.50]
-; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_blsmsk_i64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: blsmskq (%rsi), %rcx # sched: [6:0.50]
-; BDVER2-NEXT: blsmskq %rdi, %rax # sched: [2:0.50]
-; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_blsmsk_i64:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: blsmskq (%rsi), %rcx # sched: [5:1.00]
-; BTVER2-NEXT: blsmskq %rdi, %rax # sched: [2:1.00]
-; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_blsmsk_i64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: blsmskq (%rsi), %rcx # sched: [6:0.50]
-; ZNVER1-NEXT: blsmskq %rdi, %rax # sched: [2:0.25]
-; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i64, i64 *%a1
- %2 = sub i64 %1, 1
- %3 = sub i64 %a0, 1
- %4 = xor i64 %1, %2
- %5 = xor i64 %a0, %3
- %6 = add i64 %4, %5
- ret i64 %6
-}
-
-define i32 @test_blsr_i32(i32 %a0, i32 *%a1) {
-; GENERIC-LABEL: test_blsr_i32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: blsrl (%rsi), %ecx # sched: [6:0.50]
-; GENERIC-NEXT: blsrl %edi, %eax # sched: [1:0.33]
-; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_blsr_i32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: blsrl (%rsi), %ecx # sched: [6:0.50]
-; HASWELL-NEXT: blsrl %edi, %eax # sched: [1:0.50]
-; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_blsr_i32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: blsrl (%rsi), %ecx # sched: [6:0.50]
-; BROADWELL-NEXT: blsrl %edi, %eax # sched: [1:0.50]
-; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_blsr_i32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: blsrl (%rsi), %ecx # sched: [6:0.50]
-; SKYLAKE-NEXT: blsrl %edi, %eax # sched: [1:0.50]
-; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_blsr_i32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: blsrl (%rsi), %ecx # sched: [6:0.50]
-; BDVER2-NEXT: blsrl %edi, %eax # sched: [2:0.50]
-; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_blsr_i32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: blsrl (%rsi), %ecx # sched: [5:1.00]
-; BTVER2-NEXT: blsrl %edi, %eax # sched: [2:1.00]
-; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_blsr_i32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: blsrl (%rsi), %ecx # sched: [6:0.50]
-; ZNVER1-NEXT: blsrl %edi, %eax # sched: [2:0.25]
-; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i32, i32 *%a1
- %2 = sub i32 %1, 1
- %3 = sub i32 %a0, 1
- %4 = and i32 %1, %2
- %5 = and i32 %a0, %3
- %6 = add i32 %4, %5
- ret i32 %6
-}
-
-define i64 @test_blsr_i64(i64 %a0, i64 *%a1) {
-; GENERIC-LABEL: test_blsr_i64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: blsrq (%rsi), %rcx # sched: [6:0.50]
-; GENERIC-NEXT: blsrq %rdi, %rax # sched: [1:0.33]
-; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_blsr_i64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: blsrq (%rsi), %rcx # sched: [6:0.50]
-; HASWELL-NEXT: blsrq %rdi, %rax # sched: [1:0.50]
-; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_blsr_i64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: blsrq (%rsi), %rcx # sched: [6:0.50]
-; BROADWELL-NEXT: blsrq %rdi, %rax # sched: [1:0.50]
-; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_blsr_i64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: blsrq (%rsi), %rcx # sched: [6:0.50]
-; SKYLAKE-NEXT: blsrq %rdi, %rax # sched: [1:0.50]
-; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_blsr_i64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: blsrq (%rsi), %rcx # sched: [6:0.50]
-; BDVER2-NEXT: blsrq %rdi, %rax # sched: [2:0.50]
-; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_blsr_i64:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: blsrq (%rsi), %rcx # sched: [5:1.00]
-; BTVER2-NEXT: blsrq %rdi, %rax # sched: [2:1.00]
-; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_blsr_i64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: blsrq (%rsi), %rcx # sched: [6:0.50]
-; ZNVER1-NEXT: blsrq %rdi, %rax # sched: [2:0.25]
-; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i64, i64 *%a1
- %2 = sub i64 %1, 1
- %3 = sub i64 %a0, 1
- %4 = and i64 %1, %2
- %5 = and i64 %a0, %3
- %6 = add i64 %4, %5
- ret i64 %6
-}
-
-define i16 @test_cttz_i16(i16 zeroext %a0, i16 *%a1) {
-; GENERIC-LABEL: test_cttz_i16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: tzcntw (%rsi), %cx # sched: [8:1.00]
-; GENERIC-NEXT: tzcntw %di, %ax # sched: [3:1.00]
-; GENERIC-NEXT: orl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_cttz_i16:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: tzcntw (%rsi), %cx # sched: [8:1.00]
-; HASWELL-NEXT: tzcntw %di, %ax # sched: [3:1.00]
-; HASWELL-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT: # kill: def $ax killed $ax killed $eax
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cttz_i16:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: tzcntw (%rsi), %cx # sched: [8:1.00]
-; BROADWELL-NEXT: tzcntw %di, %ax # sched: [3:1.00]
-; BROADWELL-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: # kill: def $ax killed $ax killed $eax
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cttz_i16:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: tzcntw (%rsi), %cx # sched: [8:1.00]
-; SKYLAKE-NEXT: tzcntw %di, %ax # sched: [3:1.00]
-; SKYLAKE-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: # kill: def $ax killed $ax killed $eax
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_cttz_i16:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: tzcntw (%rsi), %cx # sched: [6:1.00]
-; BDVER2-NEXT: tzcntw %di, %ax # sched: [2:1.00]
-; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.50]
-; BDVER2-NEXT: # kill: def $ax killed $ax killed $eax
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_cttz_i16:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: tzcntw (%rsi), %cx # sched: [5:1.00]
-; BTVER2-NEXT: tzcntw %di, %ax # sched: [2:1.00]
-; BTVER2-NEXT: orl %ecx, %eax # sched: [1:0.50]
-; BTVER2-NEXT: # kill: def $ax killed $ax killed $eax
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_cttz_i16:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: tzcntw (%rsi), %cx # sched: [6:0.50]
-; ZNVER1-NEXT: tzcntw %di, %ax # sched: [2:0.25]
-; ZNVER1-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: # kill: def $ax killed $ax killed $eax
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i16, i16 *%a1
- %2 = tail call i16 @llvm.cttz.i16( i16 %1, i1 false )
- %3 = tail call i16 @llvm.cttz.i16( i16 %a0, i1 false )
- %4 = or i16 %2, %3
- ret i16 %4
-}
-declare i16 @llvm.cttz.i16(i16, i1)
-
-define i32 @test_cttz_i32(i32 %a0, i32 *%a1) {
-; GENERIC-LABEL: test_cttz_i32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: tzcntl (%rsi), %ecx # sched: [8:1.00]
-; GENERIC-NEXT: tzcntl %edi, %eax # sched: [3:1.00]
-; GENERIC-NEXT: orl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_cttz_i32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: tzcntl (%rsi), %ecx # sched: [8:1.00]
-; HASWELL-NEXT: tzcntl %edi, %eax # sched: [3:1.00]
-; HASWELL-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cttz_i32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: tzcntl (%rsi), %ecx # sched: [8:1.00]
-; BROADWELL-NEXT: tzcntl %edi, %eax # sched: [3:1.00]
-; BROADWELL-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cttz_i32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: tzcntl (%rsi), %ecx # sched: [8:1.00]
-; SKYLAKE-NEXT: tzcntl %edi, %eax # sched: [3:1.00]
-; SKYLAKE-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_cttz_i32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: tzcntl (%rsi), %ecx # sched: [6:1.00]
-; BDVER2-NEXT: tzcntl %edi, %eax # sched: [2:1.00]
-; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_cttz_i32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: tzcntl (%rsi), %ecx # sched: [5:1.00]
-; BTVER2-NEXT: tzcntl %edi, %eax # sched: [2:1.00]
-; BTVER2-NEXT: orl %ecx, %eax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_cttz_i32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: tzcntl (%rsi), %ecx # sched: [6:0.50]
-; ZNVER1-NEXT: tzcntl %edi, %eax # sched: [2:0.25]
-; ZNVER1-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i32, i32 *%a1
- %2 = tail call i32 @llvm.cttz.i32( i32 %1, i1 false )
- %3 = tail call i32 @llvm.cttz.i32( i32 %a0, i1 false )
- %4 = or i32 %2, %3
- ret i32 %4
-}
-declare i32 @llvm.cttz.i32(i32, i1)
-
-define i64 @test_cttz_i64(i64 %a0, i64 *%a1) {
-; GENERIC-LABEL: test_cttz_i64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: tzcntq (%rsi), %rcx # sched: [8:1.00]
-; GENERIC-NEXT: tzcntq %rdi, %rax # sched: [3:1.00]
-; GENERIC-NEXT: orq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_cttz_i64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: tzcntq (%rsi), %rcx # sched: [8:1.00]
-; HASWELL-NEXT: tzcntq %rdi, %rax # sched: [3:1.00]
-; HASWELL-NEXT: orq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cttz_i64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: tzcntq (%rsi), %rcx # sched: [8:1.00]
-; BROADWELL-NEXT: tzcntq %rdi, %rax # sched: [3:1.00]
-; BROADWELL-NEXT: orq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cttz_i64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: tzcntq (%rsi), %rcx # sched: [8:1.00]
-; SKYLAKE-NEXT: tzcntq %rdi, %rax # sched: [3:1.00]
-; SKYLAKE-NEXT: orq %rcx, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_cttz_i64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: tzcntq (%rsi), %rcx # sched: [6:1.00]
-; BDVER2-NEXT: tzcntq %rdi, %rax # sched: [2:1.00]
-; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_cttz_i64:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: tzcntq (%rsi), %rcx # sched: [5:1.00]
-; BTVER2-NEXT: tzcntq %rdi, %rax # sched: [2:1.00]
-; BTVER2-NEXT: orq %rcx, %rax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_cttz_i64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: tzcntq (%rsi), %rcx # sched: [6:0.50]
-; ZNVER1-NEXT: tzcntq %rdi, %rax # sched: [2:0.25]
-; ZNVER1-NEXT: orq %rcx, %rax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i64, i64 *%a1
- %2 = tail call i64 @llvm.cttz.i64( i64 %1, i1 false )
- %3 = tail call i64 @llvm.cttz.i64( i64 %a0, i1 false )
- %4 = or i64 %2, %3
- ret i64 %4
-}
-declare i64 @llvm.cttz.i64(i64, i1)
diff --git a/llvm/test/CodeGen/X86/bmi2-schedule.ll b/llvm/test/CodeGen/X86/bmi2-schedule.ll
deleted file mode 100644
index f235e793ab9..00000000000
--- a/llvm/test/CodeGen/X86/bmi2-schedule.ll
+++ /dev/null
@@ -1,811 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+bmi2 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
-
-define i32 @test_bzhi_i32(i32 %a0, i32 %a1, i32 *%a2) {
-; GENERIC-LABEL: test_bzhi_i32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: bzhil %edi, (%rdx), %ecx # sched: [6:1.00]
-; GENERIC-NEXT: bzhil %edi, %esi, %eax # sched: [1:1.00]
-; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_bzhi_i32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: bzhil %edi, (%rdx), %ecx # sched: [6:0.50]
-; HASWELL-NEXT: bzhil %edi, %esi, %eax # sched: [1:0.50]
-; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_bzhi_i32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: bzhil %edi, (%rdx), %ecx # sched: [6:0.50]
-; BROADWELL-NEXT: bzhil %edi, %esi, %eax # sched: [1:0.50]
-; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_bzhi_i32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: bzhil %edi, (%rdx), %ecx # sched: [6:0.50]
-; SKYLAKE-NEXT: bzhil %edi, %esi, %eax # sched: [1:0.50]
-; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_bzhi_i32:
-; KNL: # %bb.0:
-; KNL-NEXT: bzhil %edi, (%rdx), %ecx # sched: [6:0.50]
-; KNL-NEXT: bzhil %edi, %esi, %eax # sched: [1:0.50]
-; KNL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_bzhi_i32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: bzhil %edi, (%rdx), %ecx # sched: [5:0.50]
-; ZNVER1-NEXT: bzhil %edi, %esi, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i32, i32 *%a2
- %2 = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %1, i32 %a0)
- %3 = tail call i32 @llvm.x86.bmi.bzhi.32(i32 %a1, i32 %a0)
- %4 = add i32 %2, %3
- ret i32 %4
-}
-declare i32 @llvm.x86.bmi.bzhi.32(i32, i32)
-
-define i64 @test_bzhi_i64(i64 %a0, i64 %a1, i64 *%a2) {
-; GENERIC-LABEL: test_bzhi_i64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: bzhiq %rdi, (%rdx), %rcx # sched: [6:1.00]
-; GENERIC-NEXT: bzhiq %rdi, %rsi, %rax # sched: [1:1.00]
-; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_bzhi_i64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: bzhiq %rdi, (%rdx), %rcx # sched: [6:0.50]
-; HASWELL-NEXT: bzhiq %rdi, %rsi, %rax # sched: [1:0.50]
-; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_bzhi_i64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: bzhiq %rdi, (%rdx), %rcx # sched: [6:0.50]
-; BROADWELL-NEXT: bzhiq %rdi, %rsi, %rax # sched: [1:0.50]
-; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_bzhi_i64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: bzhiq %rdi, (%rdx), %rcx # sched: [6:0.50]
-; SKYLAKE-NEXT: bzhiq %rdi, %rsi, %rax # sched: [1:0.50]
-; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_bzhi_i64:
-; KNL: # %bb.0:
-; KNL-NEXT: bzhiq %rdi, (%rdx), %rcx # sched: [6:0.50]
-; KNL-NEXT: bzhiq %rdi, %rsi, %rax # sched: [1:0.50]
-; KNL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_bzhi_i64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: bzhiq %rdi, (%rdx), %rcx # sched: [5:0.50]
-; ZNVER1-NEXT: bzhiq %rdi, %rsi, %rax # sched: [1:0.25]
-; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i64, i64 *%a2
- %2 = tail call i64 @llvm.x86.bmi.bzhi.64(i64 %1, i64 %a0)
- %3 = tail call i64 @llvm.x86.bmi.bzhi.64(i64 %a1, i64 %a0)
- %4 = add i64 %2, %3
- ret i64 %4
-}
-declare i64 @llvm.x86.bmi.bzhi.64(i64, i64)
-
-define void @test_mulx_i32(i32 %a0, i32 %a1, i32* %a2) optsize {
-; GENERIC-LABEL: test_mulx_i32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: mulxl %esi, %esi, %edi # sched: [4:1.00]
-; GENERIC-NEXT: mulxl (%rdx), %esi, %edi # sched: [9:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_mulx_i32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: mulxl %esi, %esi, %edi # sched: [4:1.00]
-; HASWELL-NEXT: mulxl (%rdx), %esi, %edi # sched: [9:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_mulx_i32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: mulxl %esi, %esi, %edi # sched: [4:1.00]
-; BROADWELL-NEXT: mulxl (%rdx), %esi, %edi # sched: [9:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_mulx_i32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: mulxl %esi, %esi, %edi # sched: [4:1.00]
-; SKYLAKE-NEXT: mulxl (%rdx), %esi, %edi # sched: [9:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_mulx_i32:
-; KNL: # %bb.0:
-; KNL-NEXT: #APP
-; KNL-NEXT: mulxl %esi, %esi, %edi # sched: [4:1.00]
-; KNL-NEXT: mulxl (%rdx), %esi, %edi # sched: [9:1.00]
-; KNL-NEXT: #NO_APP
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_mulx_i32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: mulxl %esi, %esi, %edi # sched: [3:2.00]
-; ZNVER1-NEXT: mulxl (%rdx), %esi, %edi # sched: [8:2.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "mulx $1, $1, $0 \0A\09 mulx $2, $1, $0 ", "r,r,*m"(i32 %a0, i32 %a1, i32* %a2) nounwind
- ret void
-}
-
-define void @test_mulx_i64(i64 %a0, i64 %a1, i64 *%a2) {
-; GENERIC-LABEL: test_mulx_i64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: mulxq %rsi, %rsi, %rdi # sched: [4:1.00]
-; GENERIC-NEXT: mulxq (%rdx), %rsi, %rdi # sched: [9:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_mulx_i64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: mulxq %rsi, %rsi, %rdi # sched: [4:1.00]
-; HASWELL-NEXT: mulxq (%rdx), %rsi, %rdi # sched: [9:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_mulx_i64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: mulxq %rsi, %rsi, %rdi # sched: [4:1.00]
-; BROADWELL-NEXT: mulxq (%rdx), %rsi, %rdi # sched: [9:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_mulx_i64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: mulxq %rsi, %rsi, %rdi # sched: [4:1.00]
-; SKYLAKE-NEXT: mulxq (%rdx), %rsi, %rdi # sched: [9:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_mulx_i64:
-; KNL: # %bb.0:
-; KNL-NEXT: #APP
-; KNL-NEXT: mulxq %rsi, %rsi, %rdi # sched: [4:1.00]
-; KNL-NEXT: mulxq (%rdx), %rsi, %rdi # sched: [9:1.00]
-; KNL-NEXT: #NO_APP
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_mulx_i64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: mulxq %rsi, %rsi, %rdi # sched: [3:1.00]
-; ZNVER1-NEXT: mulxq (%rdx), %rsi, %rdi # sched: [8:1.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "mulx $1, $1, $0 \0A\09 mulx $2, $1, $0 ", "r,r,*m"(i64 %a0, i64 %a1, i64* %a2) nounwind
- ret void
-}
-
-define i32 @test_pdep_i32(i32 %a0, i32 %a1, i32 *%a2) {
-; GENERIC-LABEL: test_pdep_i32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pdepl (%rdx), %edi, %ecx # sched: [6:0.50]
-; GENERIC-NEXT: pdepl %esi, %edi, %eax # sched: [1:0.33]
-; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pdep_i32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: pdepl (%rdx), %edi, %ecx # sched: [8:1.00]
-; HASWELL-NEXT: pdepl %esi, %edi, %eax # sched: [3:1.00]
-; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pdep_i32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: pdepl (%rdx), %edi, %ecx # sched: [8:1.00]
-; BROADWELL-NEXT: pdepl %esi, %edi, %eax # sched: [3:1.00]
-; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pdep_i32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: pdepl (%rdx), %edi, %ecx # sched: [8:1.00]
-; SKYLAKE-NEXT: pdepl %esi, %edi, %eax # sched: [3:1.00]
-; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_pdep_i32:
-; KNL: # %bb.0:
-; KNL-NEXT: pdepl (%rdx), %edi, %ecx # sched: [8:1.00]
-; KNL-NEXT: pdepl %esi, %edi, %eax # sched: [3:1.00]
-; KNL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pdep_i32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: pdepl (%rdx), %edi, %ecx # sched: [100:0.25]
-; ZNVER1-NEXT: pdepl %esi, %edi, %eax # sched: [100:0.25]
-; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i32, i32 *%a2
- %2 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %a0, i32 %1)
- %3 = tail call i32 @llvm.x86.bmi.pdep.32(i32 %a0, i32 %a1)
- %4 = add i32 %2, %3
- ret i32 %4
-}
-declare i32 @llvm.x86.bmi.pdep.32(i32, i32)
-
-define i64 @test_pdep_i64(i64 %a0, i64 %a1, i64 *%a2) {
-; GENERIC-LABEL: test_pdep_i64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pdepq (%rdx), %rdi, %rcx # sched: [6:0.50]
-; GENERIC-NEXT: pdepq %rsi, %rdi, %rax # sched: [1:0.33]
-; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pdep_i64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: pdepq (%rdx), %rdi, %rcx # sched: [8:1.00]
-; HASWELL-NEXT: pdepq %rsi, %rdi, %rax # sched: [3:1.00]
-; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pdep_i64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: pdepq (%rdx), %rdi, %rcx # sched: [8:1.00]
-; BROADWELL-NEXT: pdepq %rsi, %rdi, %rax # sched: [3:1.00]
-; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pdep_i64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: pdepq (%rdx), %rdi, %rcx # sched: [8:1.00]
-; SKYLAKE-NEXT: pdepq %rsi, %rdi, %rax # sched: [3:1.00]
-; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_pdep_i64:
-; KNL: # %bb.0:
-; KNL-NEXT: pdepq (%rdx), %rdi, %rcx # sched: [8:1.00]
-; KNL-NEXT: pdepq %rsi, %rdi, %rax # sched: [3:1.00]
-; KNL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pdep_i64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: pdepq (%rdx), %rdi, %rcx # sched: [100:0.25]
-; ZNVER1-NEXT: pdepq %rsi, %rdi, %rax # sched: [100:0.25]
-; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i64, i64 *%a2
- %2 = tail call i64 @llvm.x86.bmi.pdep.64(i64 %a0, i64 %1)
- %3 = tail call i64 @llvm.x86.bmi.pdep.64(i64 %a0, i64 %a1)
- %4 = add i64 %2, %3
- ret i64 %4
-}
-declare i64 @llvm.x86.bmi.pdep.64(i64, i64)
-
-define i32 @test_pext_i32(i32 %a0, i32 %a1, i32 *%a2) {
-; GENERIC-LABEL: test_pext_i32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pextl (%rdx), %edi, %ecx # sched: [6:0.50]
-; GENERIC-NEXT: pextl %esi, %edi, %eax # sched: [1:0.33]
-; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pext_i32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: pextl (%rdx), %edi, %ecx # sched: [8:1.00]
-; HASWELL-NEXT: pextl %esi, %edi, %eax # sched: [3:1.00]
-; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pext_i32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: pextl (%rdx), %edi, %ecx # sched: [8:1.00]
-; BROADWELL-NEXT: pextl %esi, %edi, %eax # sched: [3:1.00]
-; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pext_i32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: pextl (%rdx), %edi, %ecx # sched: [8:1.00]
-; SKYLAKE-NEXT: pextl %esi, %edi, %eax # sched: [3:1.00]
-; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_pext_i32:
-; KNL: # %bb.0:
-; KNL-NEXT: pextl (%rdx), %edi, %ecx # sched: [8:1.00]
-; KNL-NEXT: pextl %esi, %edi, %eax # sched: [3:1.00]
-; KNL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pext_i32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: pextl (%rdx), %edi, %ecx # sched: [100:0.25]
-; ZNVER1-NEXT: pextl %esi, %edi, %eax # sched: [100:0.25]
-; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i32, i32 *%a2
- %2 = tail call i32 @llvm.x86.bmi.pext.32(i32 %a0, i32 %1)
- %3 = tail call i32 @llvm.x86.bmi.pext.32(i32 %a0, i32 %a1)
- %4 = add i32 %2, %3
- ret i32 %4
-}
-declare i32 @llvm.x86.bmi.pext.32(i32, i32)
-
-define i64 @test_pext_i64(i64 %a0, i64 %a1, i64 *%a2) {
-; GENERIC-LABEL: test_pext_i64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pextq (%rdx), %rdi, %rcx # sched: [6:0.50]
-; GENERIC-NEXT: pextq %rsi, %rdi, %rax # sched: [1:0.33]
-; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pext_i64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: pextq (%rdx), %rdi, %rcx # sched: [8:1.00]
-; HASWELL-NEXT: pextq %rsi, %rdi, %rax # sched: [3:1.00]
-; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pext_i64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: pextq (%rdx), %rdi, %rcx # sched: [8:1.00]
-; BROADWELL-NEXT: pextq %rsi, %rdi, %rax # sched: [3:1.00]
-; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pext_i64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: pextq (%rdx), %rdi, %rcx # sched: [8:1.00]
-; SKYLAKE-NEXT: pextq %rsi, %rdi, %rax # sched: [3:1.00]
-; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_pext_i64:
-; KNL: # %bb.0:
-; KNL-NEXT: pextq (%rdx), %rdi, %rcx # sched: [8:1.00]
-; KNL-NEXT: pextq %rsi, %rdi, %rax # sched: [3:1.00]
-; KNL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_pext_i64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: pextq (%rdx), %rdi, %rcx # sched: [100:0.25]
-; ZNVER1-NEXT: pextq %rsi, %rdi, %rax # sched: [100:0.25]
-; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i64, i64 *%a2
- %2 = tail call i64 @llvm.x86.bmi.pext.64(i64 %a0, i64 %1)
- %3 = tail call i64 @llvm.x86.bmi.pext.64(i64 %a0, i64 %a1)
- %4 = add i64 %2, %3
- ret i64 %4
-}
-declare i64 @llvm.x86.bmi.pext.64(i64, i64)
-
-define i32 @test_rorx_i32(i32 %a0, i32 %a1, i32 *%a2) {
-; GENERIC-LABEL: test_rorx_i32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: rorxl $5, %edi, %ecx # sched: [1:0.50]
-; GENERIC-NEXT: rorxl $5, (%rdx), %eax # sched: [6:0.50]
-; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_rorx_i32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: rorxl $5, %edi, %ecx # sched: [1:0.50]
-; HASWELL-NEXT: rorxl $5, (%rdx), %eax # sched: [6:0.50]
-; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_rorx_i32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: rorxl $5, %edi, %ecx # sched: [1:0.50]
-; BROADWELL-NEXT: rorxl $5, (%rdx), %eax # sched: [6:0.50]
-; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_rorx_i32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: rorxl $5, %edi, %ecx # sched: [1:0.50]
-; SKYLAKE-NEXT: rorxl $5, (%rdx), %eax # sched: [6:0.50]
-; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_rorx_i32:
-; KNL: # %bb.0:
-; KNL-NEXT: rorxl $5, %edi, %ecx # sched: [1:0.50]
-; KNL-NEXT: rorxl $5, (%rdx), %eax # sched: [6:0.50]
-; KNL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_rorx_i32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: rorxl $5, (%rdx), %eax # sched: [5:0.50]
-; ZNVER1-NEXT: rorxl $5, %edi, %ecx # sched: [1:0.25]
-; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i32, i32 *%a2
- %2 = lshr i32 %a0, 5
- %3 = shl i32 %a0, 27
- %4 = or i32 %2, %3
- %5 = lshr i32 %1, 5
- %6 = shl i32 %1, 27
- %7 = or i32 %5, %6
- %8 = add i32 %4, %7
- ret i32 %8
-}
-
-define i64 @test_rorx_i64(i64 %a0, i64 %a1, i64 *%a2) {
-; GENERIC-LABEL: test_rorx_i64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: rorxq $5, %rdi, %rcx # sched: [1:0.50]
-; GENERIC-NEXT: rorxq $5, (%rdx), %rax # sched: [6:0.50]
-; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_rorx_i64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: rorxq $5, %rdi, %rcx # sched: [1:0.50]
-; HASWELL-NEXT: rorxq $5, (%rdx), %rax # sched: [6:0.50]
-; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_rorx_i64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: rorxq $5, %rdi, %rcx # sched: [1:0.50]
-; BROADWELL-NEXT: rorxq $5, (%rdx), %rax # sched: [6:0.50]
-; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_rorx_i64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: rorxq $5, %rdi, %rcx # sched: [1:0.50]
-; SKYLAKE-NEXT: rorxq $5, (%rdx), %rax # sched: [6:0.50]
-; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_rorx_i64:
-; KNL: # %bb.0:
-; KNL-NEXT: rorxq $5, %rdi, %rcx # sched: [1:0.50]
-; KNL-NEXT: rorxq $5, (%rdx), %rax # sched: [6:0.50]
-; KNL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_rorx_i64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: rorxq $5, (%rdx), %rax # sched: [5:0.50]
-; ZNVER1-NEXT: rorxq $5, %rdi, %rcx # sched: [1:0.25]
-; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i64, i64 *%a2
- %2 = lshr i64 %a0, 5
- %3 = shl i64 %a0, 59
- %4 = or i64 %2, %3
- %5 = lshr i64 %1, 5
- %6 = shl i64 %1, 59
- %7 = or i64 %5, %6
- %8 = add i64 %4, %7
- ret i64 %8
-}
-
-define i32 @test_sarx_i32(i32 %a0, i32 %a1, i32 *%a2) {
-; GENERIC-LABEL: test_sarx_i32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: sarxl %esi, %edi, %ecx # sched: [1:0.50]
-; GENERIC-NEXT: sarxl %esi, (%rdx), %eax # sched: [6:0.50]
-; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_sarx_i32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: sarxl %esi, %edi, %ecx # sched: [1:0.50]
-; HASWELL-NEXT: sarxl %esi, (%rdx), %eax # sched: [6:0.50]
-; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_sarx_i32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: sarxl %esi, %edi, %ecx # sched: [1:0.50]
-; BROADWELL-NEXT: sarxl %esi, (%rdx), %eax # sched: [6:0.50]
-; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_sarx_i32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: sarxl %esi, %edi, %ecx # sched: [1:0.50]
-; SKYLAKE-NEXT: sarxl %esi, (%rdx), %eax # sched: [6:0.50]
-; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_sarx_i32:
-; KNL: # %bb.0:
-; KNL-NEXT: sarxl %esi, %edi, %ecx # sched: [1:0.50]
-; KNL-NEXT: sarxl %esi, (%rdx), %eax # sched: [6:0.50]
-; KNL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_sarx_i32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: sarxl %esi, (%rdx), %eax # sched: [5:0.50]
-; ZNVER1-NEXT: sarxl %esi, %edi, %ecx # sched: [1:0.25]
-; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i32, i32 *%a2
- %2 = ashr i32 %a0, %a1
- %3 = ashr i32 %1, %a1
- %4 = add i32 %2, %3
- ret i32 %4
-}
-
-define i64 @test_sarx_i64(i64 %a0, i64 %a1, i64 *%a2) {
-; GENERIC-LABEL: test_sarx_i64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: sarxq %rsi, %rdi, %rcx # sched: [1:0.50]
-; GENERIC-NEXT: sarxq %rsi, (%rdx), %rax # sched: [6:0.50]
-; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_sarx_i64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: sarxq %rsi, %rdi, %rcx # sched: [1:0.50]
-; HASWELL-NEXT: sarxq %rsi, (%rdx), %rax # sched: [6:0.50]
-; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_sarx_i64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: sarxq %rsi, %rdi, %rcx # sched: [1:0.50]
-; BROADWELL-NEXT: sarxq %rsi, (%rdx), %rax # sched: [6:0.50]
-; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_sarx_i64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: sarxq %rsi, %rdi, %rcx # sched: [1:0.50]
-; SKYLAKE-NEXT: sarxq %rsi, (%rdx), %rax # sched: [6:0.50]
-; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_sarx_i64:
-; KNL: # %bb.0:
-; KNL-NEXT: sarxq %rsi, %rdi, %rcx # sched: [1:0.50]
-; KNL-NEXT: sarxq %rsi, (%rdx), %rax # sched: [6:0.50]
-; KNL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_sarx_i64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: sarxq %rsi, (%rdx), %rax # sched: [5:0.50]
-; ZNVER1-NEXT: sarxq %rsi, %rdi, %rcx # sched: [1:0.25]
-; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i64, i64 *%a2
- %2 = ashr i64 %a0, %a1
- %3 = ashr i64 %1, %a1
- %4 = add i64 %2, %3
- ret i64 %4
-}
-
-define i32 @test_shlx_i32(i32 %a0, i32 %a1, i32 *%a2) {
-; GENERIC-LABEL: test_shlx_i32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: shlxl %esi, %edi, %ecx # sched: [1:0.50]
-; GENERIC-NEXT: shlxl %esi, (%rdx), %eax # sched: [6:0.50]
-; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_shlx_i32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: shlxl %esi, %edi, %ecx # sched: [1:0.50]
-; HASWELL-NEXT: shlxl %esi, (%rdx), %eax # sched: [6:0.50]
-; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_shlx_i32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: shlxl %esi, %edi, %ecx # sched: [1:0.50]
-; BROADWELL-NEXT: shlxl %esi, (%rdx), %eax # sched: [6:0.50]
-; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_shlx_i32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: shlxl %esi, %edi, %ecx # sched: [1:0.50]
-; SKYLAKE-NEXT: shlxl %esi, (%rdx), %eax # sched: [6:0.50]
-; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_shlx_i32:
-; KNL: # %bb.0:
-; KNL-NEXT: shlxl %esi, %edi, %ecx # sched: [1:0.50]
-; KNL-NEXT: shlxl %esi, (%rdx), %eax # sched: [6:0.50]
-; KNL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_shlx_i32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: shlxl %esi, (%rdx), %eax # sched: [5:0.50]
-; ZNVER1-NEXT: shlxl %esi, %edi, %ecx # sched: [1:0.25]
-; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i32, i32 *%a2
- %2 = shl i32 %a0, %a1
- %3 = shl i32 %1, %a1
- %4 = add i32 %2, %3
- ret i32 %4
-}
-
-define i64 @test_shlx_i64(i64 %a0, i64 %a1, i64 *%a2) {
-; GENERIC-LABEL: test_shlx_i64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: shlxq %rsi, %rdi, %rcx # sched: [1:0.50]
-; GENERIC-NEXT: shlxq %rsi, (%rdx), %rax # sched: [6:0.50]
-; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_shlx_i64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: shlxq %rsi, %rdi, %rcx # sched: [1:0.50]
-; HASWELL-NEXT: shlxq %rsi, (%rdx), %rax # sched: [6:0.50]
-; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_shlx_i64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: shlxq %rsi, %rdi, %rcx # sched: [1:0.50]
-; BROADWELL-NEXT: shlxq %rsi, (%rdx), %rax # sched: [6:0.50]
-; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_shlx_i64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: shlxq %rsi, %rdi, %rcx # sched: [1:0.50]
-; SKYLAKE-NEXT: shlxq %rsi, (%rdx), %rax # sched: [6:0.50]
-; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_shlx_i64:
-; KNL: # %bb.0:
-; KNL-NEXT: shlxq %rsi, %rdi, %rcx # sched: [1:0.50]
-; KNL-NEXT: shlxq %rsi, (%rdx), %rax # sched: [6:0.50]
-; KNL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_shlx_i64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: shlxq %rsi, (%rdx), %rax # sched: [5:0.50]
-; ZNVER1-NEXT: shlxq %rsi, %rdi, %rcx # sched: [1:0.25]
-; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i64, i64 *%a2
- %2 = shl i64 %a0, %a1
- %3 = shl i64 %1, %a1
- %4 = add i64 %2, %3
- ret i64 %4
-}
-
-define i32 @test_shrx_i32(i32 %a0, i32 %a1, i32 *%a2) {
-; GENERIC-LABEL: test_shrx_i32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: shrxl %esi, %edi, %ecx # sched: [1:0.50]
-; GENERIC-NEXT: shrxl %esi, (%rdx), %eax # sched: [6:0.50]
-; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_shrx_i32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: shrxl %esi, %edi, %ecx # sched: [1:0.50]
-; HASWELL-NEXT: shrxl %esi, (%rdx), %eax # sched: [6:0.50]
-; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_shrx_i32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: shrxl %esi, %edi, %ecx # sched: [1:0.50]
-; BROADWELL-NEXT: shrxl %esi, (%rdx), %eax # sched: [6:0.50]
-; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_shrx_i32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: shrxl %esi, %edi, %ecx # sched: [1:0.50]
-; SKYLAKE-NEXT: shrxl %esi, (%rdx), %eax # sched: [6:0.50]
-; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_shrx_i32:
-; KNL: # %bb.0:
-; KNL-NEXT: shrxl %esi, %edi, %ecx # sched: [1:0.50]
-; KNL-NEXT: shrxl %esi, (%rdx), %eax # sched: [6:0.50]
-; KNL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_shrx_i32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: shrxl %esi, (%rdx), %eax # sched: [5:0.50]
-; ZNVER1-NEXT: shrxl %esi, %edi, %ecx # sched: [1:0.25]
-; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i32, i32 *%a2
- %2 = lshr i32 %a0, %a1
- %3 = lshr i32 %1, %a1
- %4 = add i32 %2, %3
- ret i32 %4
-}
-
-define i64 @test_shrx_i64(i64 %a0, i64 %a1, i64 *%a2) {
-; GENERIC-LABEL: test_shrx_i64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: shrxq %rsi, %rdi, %rcx # sched: [1:0.50]
-; GENERIC-NEXT: shrxq %rsi, (%rdx), %rax # sched: [6:0.50]
-; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_shrx_i64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: shrxq %rsi, %rdi, %rcx # sched: [1:0.50]
-; HASWELL-NEXT: shrxq %rsi, (%rdx), %rax # sched: [6:0.50]
-; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_shrx_i64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: shrxq %rsi, %rdi, %rcx # sched: [1:0.50]
-; BROADWELL-NEXT: shrxq %rsi, (%rdx), %rax # sched: [6:0.50]
-; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_shrx_i64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: shrxq %rsi, %rdi, %rcx # sched: [1:0.50]
-; SKYLAKE-NEXT: shrxq %rsi, (%rdx), %rax # sched: [6:0.50]
-; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_shrx_i64:
-; KNL: # %bb.0:
-; KNL-NEXT: shrxq %rsi, %rdi, %rcx # sched: [1:0.50]
-; KNL-NEXT: shrxq %rsi, (%rdx), %rax # sched: [6:0.50]
-; KNL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_shrx_i64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: shrxq %rsi, (%rdx), %rax # sched: [5:0.50]
-; ZNVER1-NEXT: shrxq %rsi, %rdi, %rcx # sched: [1:0.25]
-; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i64, i64 *%a2
- %2 = lshr i64 %a0, %a1
- %3 = lshr i64 %1, %a1
- %4 = add i64 %2, %3
- ret i64 %4
-}
diff --git a/llvm/test/CodeGen/X86/clflushopt-schedule.ll b/llvm/test/CodeGen/X86/clflushopt-schedule.ll
deleted file mode 100644
index 14b4551cabc..00000000000
--- a/llvm/test/CodeGen/X86/clflushopt-schedule.ll
+++ /dev/null
@@ -1,36 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+clflushopt | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=goldmont | FileCheck %s --check-prefix=CHECK --check-prefix=GLM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
-
-define void @clflushopt(i8* %p) nounwind {
-; GENERIC-LABEL: clflushopt:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: clflushopt (%rdi) # sched: [5:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; GLM-LABEL: clflushopt:
-; GLM: # %bb.0:
-; GLM-NEXT: clflushopt (%rdi) # sched: [3:1.00]
-; GLM-NEXT: retq # sched: [4:1.00]
-;
-; SKYLAKE-LABEL: clflushopt:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: clflushopt (%rdi) # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: clflushopt:
-; SKX: # %bb.0:
-; SKX-NEXT: clflushopt (%rdi) # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: clflushopt:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: clflushopt (%rdi) # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void @llvm.x86.clflushopt(i8* %p)
- ret void
-}
-declare void @llvm.x86.clflushopt(i8*) nounwind
diff --git a/llvm/test/CodeGen/X86/clwb-schedule.ll b/llvm/test/CodeGen/X86/clwb-schedule.ll
deleted file mode 100644
index 4d8e3e0c8ae..00000000000
--- a/llvm/test/CodeGen/X86/clwb-schedule.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+clwb | FileCheck %s --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=SKX
-
-define void @clwb(i8* %a0) nounwind {
-; GENERIC-LABEL: clwb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: clwb (%rdi) # sched: [5:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKX-LABEL: clwb:
-; SKX: # %bb.0:
-; SKX-NEXT: clwb (%rdi) # sched: [5:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
- tail call void @llvm.x86.clwb(i8* %a0)
- ret void
-}
-declare void @llvm.x86.clwb(i8*) nounwind
diff --git a/llvm/test/CodeGen/X86/clzero-schedule.ll b/llvm/test/CodeGen/X86/clzero-schedule.ll
deleted file mode 100644
index 692c261c6e2..00000000000
--- a/llvm/test/CodeGen/X86/clzero-schedule.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+clzero | FileCheck %s --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=ZNVER1
-
-define void @test_clzero(i8* %p) {
-; GENERIC-LABEL: test_clzero:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
-; GENERIC-NEXT: clzero # sched: [100:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ZNVER1-LABEL: test_clzero:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: leaq (%rdi), %rax # sched: [1:0.25]
-; ZNVER1-NEXT: clzero # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void @llvm.x86.clzero(i8* %p)
- ret void
-}
-declare void @llvm.x86.clzero(i8*)
diff --git a/llvm/test/CodeGen/X86/cmov-schedule.ll b/llvm/test/CodeGen/X86/cmov-schedule.ll
deleted file mode 100644
index de3e8637a18..00000000000
--- a/llvm/test/CodeGen/X86/cmov-schedule.ll
+++ /dev/null
@@ -1,2203 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
-
-define void @test_cmov_16(i16 %a0, i16 %a1, i16 *%a2) optsize {
-; GENERIC-LABEL: test_cmov_16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: cmovow %si, %di # sched: [2:0.67]
-; GENERIC-NEXT: cmovnow %si, %di # sched: [2:0.67]
-; GENERIC-NEXT: cmovbw %si, %di # sched: [2:0.67]
-; GENERIC-NEXT: cmovbw %si, %di # sched: [2:0.67]
-; GENERIC-NEXT: cmovbw %si, %di # sched: [2:0.67]
-; GENERIC-NEXT: cmovaew %si, %di # sched: [2:0.67]
-; GENERIC-NEXT: cmovaew %si, %di # sched: [2:0.67]
-; GENERIC-NEXT: cmovaew %si, %di # sched: [2:0.67]
-; GENERIC-NEXT: cmovew %si, %di # sched: [2:0.67]
-; GENERIC-NEXT: cmovew %si, %di # sched: [2:0.67]
-; GENERIC-NEXT: cmovnew %si, %di # sched: [2:0.67]
-; GENERIC-NEXT: cmovnew %si, %di # sched: [2:0.67]
-; GENERIC-NEXT: cmovbew %si, %di # sched: [3:1.00]
-; GENERIC-NEXT: cmovbew %si, %di # sched: [3:1.00]
-; GENERIC-NEXT: cmovaw %si, %di # sched: [3:1.00]
-; GENERIC-NEXT: cmovaw %si, %di # sched: [3:1.00]
-; GENERIC-NEXT: cmovsw %si, %di # sched: [2:0.67]
-; GENERIC-NEXT: cmovnsw %si, %di # sched: [2:0.67]
-; GENERIC-NEXT: cmovpw %si, %di # sched: [2:0.67]
-; GENERIC-NEXT: cmovpw %si, %di # sched: [2:0.67]
-; GENERIC-NEXT: cmovnpw %si, %di # sched: [2:0.67]
-; GENERIC-NEXT: cmovnpw %si, %di # sched: [2:0.67]
-; GENERIC-NEXT: cmovlw %si, %di # sched: [2:0.67]
-; GENERIC-NEXT: cmovlw %si, %di # sched: [2:0.67]
-; GENERIC-NEXT: cmovgew %si, %di # sched: [2:0.67]
-; GENERIC-NEXT: cmovgew %si, %di # sched: [2:0.67]
-; GENERIC-NEXT: cmovlew %si, %di # sched: [2:0.67]
-; GENERIC-NEXT: cmovlew %si, %di # sched: [2:0.67]
-; GENERIC-NEXT: cmovgw %si, %di # sched: [2:0.67]
-; GENERIC-NEXT: cmovgw %si, %di # sched: [2:0.67]
-; GENERIC-NEXT: cmovow (%rdx), %di # sched: [7:0.67]
-; GENERIC-NEXT: cmovnow (%rdx), %di # sched: [7:0.67]
-; GENERIC-NEXT: cmovbw (%rdx), %di # sched: [7:0.67]
-; GENERIC-NEXT: cmovbw (%rdx), %di # sched: [7:0.67]
-; GENERIC-NEXT: cmovbw (%rdx), %di # sched: [7:0.67]
-; GENERIC-NEXT: cmovaew (%rdx), %di # sched: [7:0.67]
-; GENERIC-NEXT: cmovaew (%rdx), %di # sched: [7:0.67]
-; GENERIC-NEXT: cmovaew (%rdx), %di # sched: [7:0.67]
-; GENERIC-NEXT: cmovew (%rdx), %di # sched: [7:0.67]
-; GENERIC-NEXT: cmovew (%rdx), %di # sched: [7:0.67]
-; GENERIC-NEXT: cmovnew (%rdx), %di # sched: [7:0.67]
-; GENERIC-NEXT: cmovnew (%rdx), %di # sched: [7:0.67]
-; GENERIC-NEXT: cmovbew (%rdx), %di # sched: [8:1.00]
-; GENERIC-NEXT: cmovbew (%rdx), %di # sched: [8:1.00]
-; GENERIC-NEXT: cmovaw (%rdx), %di # sched: [8:1.00]
-; GENERIC-NEXT: cmovaw (%rdx), %di # sched: [8:1.00]
-; GENERIC-NEXT: cmovsw (%rdx), %di # sched: [7:0.67]
-; GENERIC-NEXT: cmovnsw (%rdx), %di # sched: [7:0.67]
-; GENERIC-NEXT: cmovpw (%rdx), %di # sched: [7:0.67]
-; GENERIC-NEXT: cmovpw (%rdx), %di # sched: [7:0.67]
-; GENERIC-NEXT: cmovnpw (%rdx), %di # sched: [7:0.67]
-; GENERIC-NEXT: cmovnpw (%rdx), %di # sched: [7:0.67]
-; GENERIC-NEXT: cmovlw (%rdx), %di # sched: [7:0.67]
-; GENERIC-NEXT: cmovlw (%rdx), %di # sched: [7:0.67]
-; GENERIC-NEXT: cmovgew (%rdx), %di # sched: [7:0.67]
-; GENERIC-NEXT: cmovgew (%rdx), %di # sched: [7:0.67]
-; GENERIC-NEXT: cmovlew (%rdx), %di # sched: [7:0.67]
-; GENERIC-NEXT: cmovlew (%rdx), %di # sched: [7:0.67]
-; GENERIC-NEXT: cmovgw (%rdx), %di # sched: [7:0.67]
-; GENERIC-NEXT: cmovgw (%rdx), %di # sched: [7:0.67]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cmov_16:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: cmovow %si, %di # sched: [1:0.50]
-; ATOM-NEXT: cmovnow %si, %di # sched: [1:0.50]
-; ATOM-NEXT: cmovbw %si, %di # sched: [1:0.50]
-; ATOM-NEXT: cmovbw %si, %di # sched: [1:0.50]
-; ATOM-NEXT: cmovbw %si, %di # sched: [1:0.50]
-; ATOM-NEXT: cmovaew %si, %di # sched: [1:0.50]
-; ATOM-NEXT: cmovaew %si, %di # sched: [1:0.50]
-; ATOM-NEXT: cmovaew %si, %di # sched: [1:0.50]
-; ATOM-NEXT: cmovew %si, %di # sched: [1:0.50]
-; ATOM-NEXT: cmovew %si, %di # sched: [1:0.50]
-; ATOM-NEXT: cmovnew %si, %di # sched: [1:0.50]
-; ATOM-NEXT: cmovnew %si, %di # sched: [1:0.50]
-; ATOM-NEXT: cmovbew %si, %di # sched: [1:0.50]
-; ATOM-NEXT: cmovbew %si, %di # sched: [1:0.50]
-; ATOM-NEXT: cmovaw %si, %di # sched: [1:0.50]
-; ATOM-NEXT: cmovaw %si, %di # sched: [1:0.50]
-; ATOM-NEXT: cmovsw %si, %di # sched: [1:0.50]
-; ATOM-NEXT: cmovnsw %si, %di # sched: [1:0.50]
-; ATOM-NEXT: cmovpw %si, %di # sched: [1:0.50]
-; ATOM-NEXT: cmovpw %si, %di # sched: [1:0.50]
-; ATOM-NEXT: cmovnpw %si, %di # sched: [1:0.50]
-; ATOM-NEXT: cmovnpw %si, %di # sched: [1:0.50]
-; ATOM-NEXT: cmovlw %si, %di # sched: [1:0.50]
-; ATOM-NEXT: cmovlw %si, %di # sched: [1:0.50]
-; ATOM-NEXT: cmovgew %si, %di # sched: [1:0.50]
-; ATOM-NEXT: cmovgew %si, %di # sched: [1:0.50]
-; ATOM-NEXT: cmovlew %si, %di # sched: [1:0.50]
-; ATOM-NEXT: cmovlew %si, %di # sched: [1:0.50]
-; ATOM-NEXT: cmovgw %si, %di # sched: [1:0.50]
-; ATOM-NEXT: cmovgw %si, %di # sched: [1:0.50]
-; ATOM-NEXT: cmovow (%rdx), %di # sched: [1:1.00]
-; ATOM-NEXT: cmovnow (%rdx), %di # sched: [1:1.00]
-; ATOM-NEXT: cmovbw (%rdx), %di # sched: [1:1.00]
-; ATOM-NEXT: cmovbw (%rdx), %di # sched: [1:1.00]
-; ATOM-NEXT: cmovbw (%rdx), %di # sched: [1:1.00]
-; ATOM-NEXT: cmovaew (%rdx), %di # sched: [1:1.00]
-; ATOM-NEXT: cmovaew (%rdx), %di # sched: [1:1.00]
-; ATOM-NEXT: cmovaew (%rdx), %di # sched: [1:1.00]
-; ATOM-NEXT: cmovew (%rdx), %di # sched: [1:1.00]
-; ATOM-NEXT: cmovew (%rdx), %di # sched: [1:1.00]
-; ATOM-NEXT: cmovnew (%rdx), %di # sched: [1:1.00]
-; ATOM-NEXT: cmovnew (%rdx), %di # sched: [1:1.00]
-; ATOM-NEXT: cmovbew (%rdx), %di # sched: [1:1.00]
-; ATOM-NEXT: cmovbew (%rdx), %di # sched: [1:1.00]
-; ATOM-NEXT: cmovaw (%rdx), %di # sched: [1:1.00]
-; ATOM-NEXT: cmovaw (%rdx), %di # sched: [1:1.00]
-; ATOM-NEXT: cmovsw (%rdx), %di # sched: [1:1.00]
-; ATOM-NEXT: cmovnsw (%rdx), %di # sched: [1:1.00]
-; ATOM-NEXT: cmovpw (%rdx), %di # sched: [1:1.00]
-; ATOM-NEXT: cmovpw (%rdx), %di # sched: [1:1.00]
-; ATOM-NEXT: cmovnpw (%rdx), %di # sched: [1:1.00]
-; ATOM-NEXT: cmovnpw (%rdx), %di # sched: [1:1.00]
-; ATOM-NEXT: cmovlw (%rdx), %di # sched: [1:1.00]
-; ATOM-NEXT: cmovlw (%rdx), %di # sched: [1:1.00]
-; ATOM-NEXT: cmovgew (%rdx), %di # sched: [1:1.00]
-; ATOM-NEXT: cmovgew (%rdx), %di # sched: [1:1.00]
-; ATOM-NEXT: cmovlew (%rdx), %di # sched: [1:1.00]
-; ATOM-NEXT: cmovlew (%rdx), %di # sched: [1:1.00]
-; ATOM-NEXT: cmovgw (%rdx), %di # sched: [1:1.00]
-; ATOM-NEXT: cmovgw (%rdx), %di # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cmov_16:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: cmovow %si, %di # sched: [2:1.00]
-; SLM-NEXT: cmovnow %si, %di # sched: [2:1.00]
-; SLM-NEXT: cmovbw %si, %di # sched: [2:1.00]
-; SLM-NEXT: cmovbw %si, %di # sched: [2:1.00]
-; SLM-NEXT: cmovbw %si, %di # sched: [2:1.00]
-; SLM-NEXT: cmovaew %si, %di # sched: [2:1.00]
-; SLM-NEXT: cmovaew %si, %di # sched: [2:1.00]
-; SLM-NEXT: cmovaew %si, %di # sched: [2:1.00]
-; SLM-NEXT: cmovew %si, %di # sched: [2:1.00]
-; SLM-NEXT: cmovew %si, %di # sched: [2:1.00]
-; SLM-NEXT: cmovnew %si, %di # sched: [2:1.00]
-; SLM-NEXT: cmovnew %si, %di # sched: [2:1.00]
-; SLM-NEXT: cmovbew %si, %di # sched: [2:1.00]
-; SLM-NEXT: cmovbew %si, %di # sched: [2:1.00]
-; SLM-NEXT: cmovaw %si, %di # sched: [2:1.00]
-; SLM-NEXT: cmovaw %si, %di # sched: [2:1.00]
-; SLM-NEXT: cmovsw %si, %di # sched: [2:1.00]
-; SLM-NEXT: cmovnsw %si, %di # sched: [2:1.00]
-; SLM-NEXT: cmovpw %si, %di # sched: [2:1.00]
-; SLM-NEXT: cmovpw %si, %di # sched: [2:1.00]
-; SLM-NEXT: cmovnpw %si, %di # sched: [2:1.00]
-; SLM-NEXT: cmovnpw %si, %di # sched: [2:1.00]
-; SLM-NEXT: cmovlw %si, %di # sched: [2:1.00]
-; SLM-NEXT: cmovlw %si, %di # sched: [2:1.00]
-; SLM-NEXT: cmovgew %si, %di # sched: [2:1.00]
-; SLM-NEXT: cmovgew %si, %di # sched: [2:1.00]
-; SLM-NEXT: cmovlew %si, %di # sched: [2:1.00]
-; SLM-NEXT: cmovlew %si, %di # sched: [2:1.00]
-; SLM-NEXT: cmovgw %si, %di # sched: [2:1.00]
-; SLM-NEXT: cmovgw %si, %di # sched: [2:1.00]
-; SLM-NEXT: cmovow (%rdx), %di # sched: [5:1.00]
-; SLM-NEXT: cmovnow (%rdx), %di # sched: [5:1.00]
-; SLM-NEXT: cmovbw (%rdx), %di # sched: [5:1.00]
-; SLM-NEXT: cmovbw (%rdx), %di # sched: [5:1.00]
-; SLM-NEXT: cmovbw (%rdx), %di # sched: [5:1.00]
-; SLM-NEXT: cmovaew (%rdx), %di # sched: [5:1.00]
-; SLM-NEXT: cmovaew (%rdx), %di # sched: [5:1.00]
-; SLM-NEXT: cmovaew (%rdx), %di # sched: [5:1.00]
-; SLM-NEXT: cmovew (%rdx), %di # sched: [5:1.00]
-; SLM-NEXT: cmovew (%rdx), %di # sched: [5:1.00]
-; SLM-NEXT: cmovnew (%rdx), %di # sched: [5:1.00]
-; SLM-NEXT: cmovnew (%rdx), %di # sched: [5:1.00]
-; SLM-NEXT: cmovbew (%rdx), %di # sched: [5:1.00]
-; SLM-NEXT: cmovbew (%rdx), %di # sched: [5:1.00]
-; SLM-NEXT: cmovaw (%rdx), %di # sched: [5:1.00]
-; SLM-NEXT: cmovaw (%rdx), %di # sched: [5:1.00]
-; SLM-NEXT: cmovsw (%rdx), %di # sched: [5:1.00]
-; SLM-NEXT: cmovnsw (%rdx), %di # sched: [5:1.00]
-; SLM-NEXT: cmovpw (%rdx), %di # sched: [5:1.00]
-; SLM-NEXT: cmovpw (%rdx), %di # sched: [5:1.00]
-; SLM-NEXT: cmovnpw (%rdx), %di # sched: [5:1.00]
-; SLM-NEXT: cmovnpw (%rdx), %di # sched: [5:1.00]
-; SLM-NEXT: cmovlw (%rdx), %di # sched: [5:1.00]
-; SLM-NEXT: cmovlw (%rdx), %di # sched: [5:1.00]
-; SLM-NEXT: cmovgew (%rdx), %di # sched: [5:1.00]
-; SLM-NEXT: cmovgew (%rdx), %di # sched: [5:1.00]
-; SLM-NEXT: cmovlew (%rdx), %di # sched: [5:1.00]
-; SLM-NEXT: cmovlew (%rdx), %di # sched: [5:1.00]
-; SLM-NEXT: cmovgw (%rdx), %di # sched: [5:1.00]
-; SLM-NEXT: cmovgw (%rdx), %di # sched: [5:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_cmov_16:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: cmovow %si, %di # sched: [2:0.67]
-; SANDY-NEXT: cmovnow %si, %di # sched: [2:0.67]
-; SANDY-NEXT: cmovbw %si, %di # sched: [2:0.67]
-; SANDY-NEXT: cmovbw %si, %di # sched: [2:0.67]
-; SANDY-NEXT: cmovbw %si, %di # sched: [2:0.67]
-; SANDY-NEXT: cmovaew %si, %di # sched: [2:0.67]
-; SANDY-NEXT: cmovaew %si, %di # sched: [2:0.67]
-; SANDY-NEXT: cmovaew %si, %di # sched: [2:0.67]
-; SANDY-NEXT: cmovew %si, %di # sched: [2:0.67]
-; SANDY-NEXT: cmovew %si, %di # sched: [2:0.67]
-; SANDY-NEXT: cmovnew %si, %di # sched: [2:0.67]
-; SANDY-NEXT: cmovnew %si, %di # sched: [2:0.67]
-; SANDY-NEXT: cmovbew %si, %di # sched: [3:1.00]
-; SANDY-NEXT: cmovbew %si, %di # sched: [3:1.00]
-; SANDY-NEXT: cmovaw %si, %di # sched: [3:1.00]
-; SANDY-NEXT: cmovaw %si, %di # sched: [3:1.00]
-; SANDY-NEXT: cmovsw %si, %di # sched: [2:0.67]
-; SANDY-NEXT: cmovnsw %si, %di # sched: [2:0.67]
-; SANDY-NEXT: cmovpw %si, %di # sched: [2:0.67]
-; SANDY-NEXT: cmovpw %si, %di # sched: [2:0.67]
-; SANDY-NEXT: cmovnpw %si, %di # sched: [2:0.67]
-; SANDY-NEXT: cmovnpw %si, %di # sched: [2:0.67]
-; SANDY-NEXT: cmovlw %si, %di # sched: [2:0.67]
-; SANDY-NEXT: cmovlw %si, %di # sched: [2:0.67]
-; SANDY-NEXT: cmovgew %si, %di # sched: [2:0.67]
-; SANDY-NEXT: cmovgew %si, %di # sched: [2:0.67]
-; SANDY-NEXT: cmovlew %si, %di # sched: [2:0.67]
-; SANDY-NEXT: cmovlew %si, %di # sched: [2:0.67]
-; SANDY-NEXT: cmovgw %si, %di # sched: [2:0.67]
-; SANDY-NEXT: cmovgw %si, %di # sched: [2:0.67]
-; SANDY-NEXT: cmovow (%rdx), %di # sched: [7:0.67]
-; SANDY-NEXT: cmovnow (%rdx), %di # sched: [7:0.67]
-; SANDY-NEXT: cmovbw (%rdx), %di # sched: [7:0.67]
-; SANDY-NEXT: cmovbw (%rdx), %di # sched: [7:0.67]
-; SANDY-NEXT: cmovbw (%rdx), %di # sched: [7:0.67]
-; SANDY-NEXT: cmovaew (%rdx), %di # sched: [7:0.67]
-; SANDY-NEXT: cmovaew (%rdx), %di # sched: [7:0.67]
-; SANDY-NEXT: cmovaew (%rdx), %di # sched: [7:0.67]
-; SANDY-NEXT: cmovew (%rdx), %di # sched: [7:0.67]
-; SANDY-NEXT: cmovew (%rdx), %di # sched: [7:0.67]
-; SANDY-NEXT: cmovnew (%rdx), %di # sched: [7:0.67]
-; SANDY-NEXT: cmovnew (%rdx), %di # sched: [7:0.67]
-; SANDY-NEXT: cmovbew (%rdx), %di # sched: [8:1.00]
-; SANDY-NEXT: cmovbew (%rdx), %di # sched: [8:1.00]
-; SANDY-NEXT: cmovaw (%rdx), %di # sched: [8:1.00]
-; SANDY-NEXT: cmovaw (%rdx), %di # sched: [8:1.00]
-; SANDY-NEXT: cmovsw (%rdx), %di # sched: [7:0.67]
-; SANDY-NEXT: cmovnsw (%rdx), %di # sched: [7:0.67]
-; SANDY-NEXT: cmovpw (%rdx), %di # sched: [7:0.67]
-; SANDY-NEXT: cmovpw (%rdx), %di # sched: [7:0.67]
-; SANDY-NEXT: cmovnpw (%rdx), %di # sched: [7:0.67]
-; SANDY-NEXT: cmovnpw (%rdx), %di # sched: [7:0.67]
-; SANDY-NEXT: cmovlw (%rdx), %di # sched: [7:0.67]
-; SANDY-NEXT: cmovlw (%rdx), %di # sched: [7:0.67]
-; SANDY-NEXT: cmovgew (%rdx), %di # sched: [7:0.67]
-; SANDY-NEXT: cmovgew (%rdx), %di # sched: [7:0.67]
-; SANDY-NEXT: cmovlew (%rdx), %di # sched: [7:0.67]
-; SANDY-NEXT: cmovlew (%rdx), %di # sched: [7:0.67]
-; SANDY-NEXT: cmovgw (%rdx), %di # sched: [7:0.67]
-; SANDY-NEXT: cmovgw (%rdx), %di # sched: [7:0.67]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_cmov_16:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: cmovow %si, %di # sched: [2:0.50]
-; HASWELL-NEXT: cmovnow %si, %di # sched: [2:0.50]
-; HASWELL-NEXT: cmovbw %si, %di # sched: [2:0.50]
-; HASWELL-NEXT: cmovbw %si, %di # sched: [2:0.50]
-; HASWELL-NEXT: cmovbw %si, %di # sched: [2:0.50]
-; HASWELL-NEXT: cmovaew %si, %di # sched: [2:0.50]
-; HASWELL-NEXT: cmovaew %si, %di # sched: [2:0.50]
-; HASWELL-NEXT: cmovaew %si, %di # sched: [2:0.50]
-; HASWELL-NEXT: cmovew %si, %di # sched: [2:0.50]
-; HASWELL-NEXT: cmovew %si, %di # sched: [2:0.50]
-; HASWELL-NEXT: cmovnew %si, %di # sched: [2:0.50]
-; HASWELL-NEXT: cmovnew %si, %di # sched: [2:0.50]
-; HASWELL-NEXT: cmovbew %si, %di # sched: [3:0.75]
-; HASWELL-NEXT: cmovbew %si, %di # sched: [3:0.75]
-; HASWELL-NEXT: cmovaw %si, %di # sched: [3:0.75]
-; HASWELL-NEXT: cmovaw %si, %di # sched: [3:0.75]
-; HASWELL-NEXT: cmovsw %si, %di # sched: [2:0.50]
-; HASWELL-NEXT: cmovnsw %si, %di # sched: [2:0.50]
-; HASWELL-NEXT: cmovpw %si, %di # sched: [2:0.50]
-; HASWELL-NEXT: cmovpw %si, %di # sched: [2:0.50]
-; HASWELL-NEXT: cmovnpw %si, %di # sched: [2:0.50]
-; HASWELL-NEXT: cmovnpw %si, %di # sched: [2:0.50]
-; HASWELL-NEXT: cmovlw %si, %di # sched: [2:0.50]
-; HASWELL-NEXT: cmovlw %si, %di # sched: [2:0.50]
-; HASWELL-NEXT: cmovgew %si, %di # sched: [2:0.50]
-; HASWELL-NEXT: cmovgew %si, %di # sched: [2:0.50]
-; HASWELL-NEXT: cmovlew %si, %di # sched: [2:0.50]
-; HASWELL-NEXT: cmovlew %si, %di # sched: [2:0.50]
-; HASWELL-NEXT: cmovgw %si, %di # sched: [2:0.50]
-; HASWELL-NEXT: cmovgw %si, %di # sched: [2:0.50]
-; HASWELL-NEXT: cmovow (%rdx), %di # sched: [7:0.50]
-; HASWELL-NEXT: cmovnow (%rdx), %di # sched: [7:0.50]
-; HASWELL-NEXT: cmovbw (%rdx), %di # sched: [7:0.50]
-; HASWELL-NEXT: cmovbw (%rdx), %di # sched: [7:0.50]
-; HASWELL-NEXT: cmovbw (%rdx), %di # sched: [7:0.50]
-; HASWELL-NEXT: cmovaew (%rdx), %di # sched: [7:0.50]
-; HASWELL-NEXT: cmovaew (%rdx), %di # sched: [7:0.50]
-; HASWELL-NEXT: cmovaew (%rdx), %di # sched: [7:0.50]
-; HASWELL-NEXT: cmovew (%rdx), %di # sched: [7:0.50]
-; HASWELL-NEXT: cmovew (%rdx), %di # sched: [7:0.50]
-; HASWELL-NEXT: cmovnew (%rdx), %di # sched: [7:0.50]
-; HASWELL-NEXT: cmovnew (%rdx), %di # sched: [7:0.50]
-; HASWELL-NEXT: cmovbew (%rdx), %di # sched: [8:0.75]
-; HASWELL-NEXT: cmovbew (%rdx), %di # sched: [8:0.75]
-; HASWELL-NEXT: cmovaw (%rdx), %di # sched: [8:0.75]
-; HASWELL-NEXT: cmovaw (%rdx), %di # sched: [8:0.75]
-; HASWELL-NEXT: cmovsw (%rdx), %di # sched: [7:0.50]
-; HASWELL-NEXT: cmovnsw (%rdx), %di # sched: [7:0.50]
-; HASWELL-NEXT: cmovpw (%rdx), %di # sched: [7:0.50]
-; HASWELL-NEXT: cmovpw (%rdx), %di # sched: [7:0.50]
-; HASWELL-NEXT: cmovnpw (%rdx), %di # sched: [7:0.50]
-; HASWELL-NEXT: cmovnpw (%rdx), %di # sched: [7:0.50]
-; HASWELL-NEXT: cmovlw (%rdx), %di # sched: [7:0.50]
-; HASWELL-NEXT: cmovlw (%rdx), %di # sched: [7:0.50]
-; HASWELL-NEXT: cmovgew (%rdx), %di # sched: [7:0.50]
-; HASWELL-NEXT: cmovgew (%rdx), %di # sched: [7:0.50]
-; HASWELL-NEXT: cmovlew (%rdx), %di # sched: [7:0.50]
-; HASWELL-NEXT: cmovlew (%rdx), %di # sched: [7:0.50]
-; HASWELL-NEXT: cmovgw (%rdx), %di # sched: [7:0.50]
-; HASWELL-NEXT: cmovgw (%rdx), %di # sched: [7:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cmov_16:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: cmovow %si, %di # sched: [1:0.50]
-; BROADWELL-NEXT: cmovnow %si, %di # sched: [1:0.50]
-; BROADWELL-NEXT: cmovbw %si, %di # sched: [1:0.50]
-; BROADWELL-NEXT: cmovbw %si, %di # sched: [1:0.50]
-; BROADWELL-NEXT: cmovbw %si, %di # sched: [1:0.50]
-; BROADWELL-NEXT: cmovaew %si, %di # sched: [1:0.50]
-; BROADWELL-NEXT: cmovaew %si, %di # sched: [1:0.50]
-; BROADWELL-NEXT: cmovaew %si, %di # sched: [1:0.50]
-; BROADWELL-NEXT: cmovew %si, %di # sched: [1:0.50]
-; BROADWELL-NEXT: cmovew %si, %di # sched: [1:0.50]
-; BROADWELL-NEXT: cmovnew %si, %di # sched: [1:0.50]
-; BROADWELL-NEXT: cmovnew %si, %di # sched: [1:0.50]
-; BROADWELL-NEXT: cmovbew %si, %di # sched: [2:0.50]
-; BROADWELL-NEXT: cmovbew %si, %di # sched: [2:0.50]
-; BROADWELL-NEXT: cmovaw %si, %di # sched: [2:0.50]
-; BROADWELL-NEXT: cmovaw %si, %di # sched: [2:0.50]
-; BROADWELL-NEXT: cmovsw %si, %di # sched: [1:0.50]
-; BROADWELL-NEXT: cmovnsw %si, %di # sched: [1:0.50]
-; BROADWELL-NEXT: cmovpw %si, %di # sched: [1:0.50]
-; BROADWELL-NEXT: cmovpw %si, %di # sched: [1:0.50]
-; BROADWELL-NEXT: cmovnpw %si, %di # sched: [1:0.50]
-; BROADWELL-NEXT: cmovnpw %si, %di # sched: [1:0.50]
-; BROADWELL-NEXT: cmovlw %si, %di # sched: [1:0.50]
-; BROADWELL-NEXT: cmovlw %si, %di # sched: [1:0.50]
-; BROADWELL-NEXT: cmovgew %si, %di # sched: [1:0.50]
-; BROADWELL-NEXT: cmovgew %si, %di # sched: [1:0.50]
-; BROADWELL-NEXT: cmovlew %si, %di # sched: [1:0.50]
-; BROADWELL-NEXT: cmovlew %si, %di # sched: [1:0.50]
-; BROADWELL-NEXT: cmovgw %si, %di # sched: [1:0.50]
-; BROADWELL-NEXT: cmovgw %si, %di # sched: [1:0.50]
-; BROADWELL-NEXT: cmovow (%rdx), %di # sched: [6:0.50]
-; BROADWELL-NEXT: cmovnow (%rdx), %di # sched: [6:0.50]
-; BROADWELL-NEXT: cmovbw (%rdx), %di # sched: [6:0.50]
-; BROADWELL-NEXT: cmovbw (%rdx), %di # sched: [6:0.50]
-; BROADWELL-NEXT: cmovbw (%rdx), %di # sched: [6:0.50]
-; BROADWELL-NEXT: cmovaew (%rdx), %di # sched: [6:0.50]
-; BROADWELL-NEXT: cmovaew (%rdx), %di # sched: [6:0.50]
-; BROADWELL-NEXT: cmovaew (%rdx), %di # sched: [6:0.50]
-; BROADWELL-NEXT: cmovew (%rdx), %di # sched: [6:0.50]
-; BROADWELL-NEXT: cmovew (%rdx), %di # sched: [6:0.50]
-; BROADWELL-NEXT: cmovnew (%rdx), %di # sched: [6:0.50]
-; BROADWELL-NEXT: cmovnew (%rdx), %di # sched: [6:0.50]
-; BROADWELL-NEXT: cmovbew (%rdx), %di # sched: [7:0.50]
-; BROADWELL-NEXT: cmovbew (%rdx), %di # sched: [7:0.50]
-; BROADWELL-NEXT: cmovaw (%rdx), %di # sched: [7:0.50]
-; BROADWELL-NEXT: cmovaw (%rdx), %di # sched: [7:0.50]
-; BROADWELL-NEXT: cmovsw (%rdx), %di # sched: [6:0.50]
-; BROADWELL-NEXT: cmovnsw (%rdx), %di # sched: [6:0.50]
-; BROADWELL-NEXT: cmovpw (%rdx), %di # sched: [6:0.50]
-; BROADWELL-NEXT: cmovpw (%rdx), %di # sched: [6:0.50]
-; BROADWELL-NEXT: cmovnpw (%rdx), %di # sched: [6:0.50]
-; BROADWELL-NEXT: cmovnpw (%rdx), %di # sched: [6:0.50]
-; BROADWELL-NEXT: cmovlw (%rdx), %di # sched: [6:0.50]
-; BROADWELL-NEXT: cmovlw (%rdx), %di # sched: [6:0.50]
-; BROADWELL-NEXT: cmovgew (%rdx), %di # sched: [6:0.50]
-; BROADWELL-NEXT: cmovgew (%rdx), %di # sched: [6:0.50]
-; BROADWELL-NEXT: cmovlew (%rdx), %di # sched: [6:0.50]
-; BROADWELL-NEXT: cmovlew (%rdx), %di # sched: [6:0.50]
-; BROADWELL-NEXT: cmovgw (%rdx), %di # sched: [6:0.50]
-; BROADWELL-NEXT: cmovgw (%rdx), %di # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cmov_16:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: cmovow %si, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovnow %si, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovbw %si, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovbw %si, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovbw %si, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovaew %si, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovaew %si, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovaew %si, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovew %si, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovew %si, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovnew %si, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovnew %si, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovbew %si, %di # sched: [2:1.00]
-; SKYLAKE-NEXT: cmovbew %si, %di # sched: [2:1.00]
-; SKYLAKE-NEXT: cmovaw %si, %di # sched: [2:1.00]
-; SKYLAKE-NEXT: cmovaw %si, %di # sched: [2:1.00]
-; SKYLAKE-NEXT: cmovsw %si, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovnsw %si, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovpw %si, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovpw %si, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovnpw %si, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovnpw %si, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovlw %si, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovlw %si, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovgew %si, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovgew %si, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovlew %si, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovlew %si, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovgw %si, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovgw %si, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovow (%rdx), %di # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovnow (%rdx), %di # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovbw (%rdx), %di # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovbw (%rdx), %di # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovbw (%rdx), %di # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovaew (%rdx), %di # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovaew (%rdx), %di # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovaew (%rdx), %di # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovew (%rdx), %di # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovew (%rdx), %di # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovnew (%rdx), %di # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovnew (%rdx), %di # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovbew (%rdx), %di # sched: [7:1.00]
-; SKYLAKE-NEXT: cmovbew (%rdx), %di # sched: [7:1.00]
-; SKYLAKE-NEXT: cmovaw (%rdx), %di # sched: [7:1.00]
-; SKYLAKE-NEXT: cmovaw (%rdx), %di # sched: [7:1.00]
-; SKYLAKE-NEXT: cmovsw (%rdx), %di # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovnsw (%rdx), %di # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovpw (%rdx), %di # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovpw (%rdx), %di # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovnpw (%rdx), %di # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovnpw (%rdx), %di # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovlw (%rdx), %di # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovlw (%rdx), %di # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovgew (%rdx), %di # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovgew (%rdx), %di # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovlew (%rdx), %di # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovlew (%rdx), %di # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovgw (%rdx), %di # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovgw (%rdx), %di # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cmov_16:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: cmovow %si, %di # sched: [1:0.50]
-; SKX-NEXT: cmovnow %si, %di # sched: [1:0.50]
-; SKX-NEXT: cmovbw %si, %di # sched: [1:0.50]
-; SKX-NEXT: cmovbw %si, %di # sched: [1:0.50]
-; SKX-NEXT: cmovbw %si, %di # sched: [1:0.50]
-; SKX-NEXT: cmovaew %si, %di # sched: [1:0.50]
-; SKX-NEXT: cmovaew %si, %di # sched: [1:0.50]
-; SKX-NEXT: cmovaew %si, %di # sched: [1:0.50]
-; SKX-NEXT: cmovew %si, %di # sched: [1:0.50]
-; SKX-NEXT: cmovew %si, %di # sched: [1:0.50]
-; SKX-NEXT: cmovnew %si, %di # sched: [1:0.50]
-; SKX-NEXT: cmovnew %si, %di # sched: [1:0.50]
-; SKX-NEXT: cmovbew %si, %di # sched: [2:1.00]
-; SKX-NEXT: cmovbew %si, %di # sched: [2:1.00]
-; SKX-NEXT: cmovaw %si, %di # sched: [2:1.00]
-; SKX-NEXT: cmovaw %si, %di # sched: [2:1.00]
-; SKX-NEXT: cmovsw %si, %di # sched: [1:0.50]
-; SKX-NEXT: cmovnsw %si, %di # sched: [1:0.50]
-; SKX-NEXT: cmovpw %si, %di # sched: [1:0.50]
-; SKX-NEXT: cmovpw %si, %di # sched: [1:0.50]
-; SKX-NEXT: cmovnpw %si, %di # sched: [1:0.50]
-; SKX-NEXT: cmovnpw %si, %di # sched: [1:0.50]
-; SKX-NEXT: cmovlw %si, %di # sched: [1:0.50]
-; SKX-NEXT: cmovlw %si, %di # sched: [1:0.50]
-; SKX-NEXT: cmovgew %si, %di # sched: [1:0.50]
-; SKX-NEXT: cmovgew %si, %di # sched: [1:0.50]
-; SKX-NEXT: cmovlew %si, %di # sched: [1:0.50]
-; SKX-NEXT: cmovlew %si, %di # sched: [1:0.50]
-; SKX-NEXT: cmovgw %si, %di # sched: [1:0.50]
-; SKX-NEXT: cmovgw %si, %di # sched: [1:0.50]
-; SKX-NEXT: cmovow (%rdx), %di # sched: [6:0.50]
-; SKX-NEXT: cmovnow (%rdx), %di # sched: [6:0.50]
-; SKX-NEXT: cmovbw (%rdx), %di # sched: [6:0.50]
-; SKX-NEXT: cmovbw (%rdx), %di # sched: [6:0.50]
-; SKX-NEXT: cmovbw (%rdx), %di # sched: [6:0.50]
-; SKX-NEXT: cmovaew (%rdx), %di # sched: [6:0.50]
-; SKX-NEXT: cmovaew (%rdx), %di # sched: [6:0.50]
-; SKX-NEXT: cmovaew (%rdx), %di # sched: [6:0.50]
-; SKX-NEXT: cmovew (%rdx), %di # sched: [6:0.50]
-; SKX-NEXT: cmovew (%rdx), %di # sched: [6:0.50]
-; SKX-NEXT: cmovnew (%rdx), %di # sched: [6:0.50]
-; SKX-NEXT: cmovnew (%rdx), %di # sched: [6:0.50]
-; SKX-NEXT: cmovbew (%rdx), %di # sched: [7:1.00]
-; SKX-NEXT: cmovbew (%rdx), %di # sched: [7:1.00]
-; SKX-NEXT: cmovaw (%rdx), %di # sched: [7:1.00]
-; SKX-NEXT: cmovaw (%rdx), %di # sched: [7:1.00]
-; SKX-NEXT: cmovsw (%rdx), %di # sched: [6:0.50]
-; SKX-NEXT: cmovnsw (%rdx), %di # sched: [6:0.50]
-; SKX-NEXT: cmovpw (%rdx), %di # sched: [6:0.50]
-; SKX-NEXT: cmovpw (%rdx), %di # sched: [6:0.50]
-; SKX-NEXT: cmovnpw (%rdx), %di # sched: [6:0.50]
-; SKX-NEXT: cmovnpw (%rdx), %di # sched: [6:0.50]
-; SKX-NEXT: cmovlw (%rdx), %di # sched: [6:0.50]
-; SKX-NEXT: cmovlw (%rdx), %di # sched: [6:0.50]
-; SKX-NEXT: cmovgew (%rdx), %di # sched: [6:0.50]
-; SKX-NEXT: cmovgew (%rdx), %di # sched: [6:0.50]
-; SKX-NEXT: cmovlew (%rdx), %di # sched: [6:0.50]
-; SKX-NEXT: cmovlew (%rdx), %di # sched: [6:0.50]
-; SKX-NEXT: cmovgw (%rdx), %di # sched: [6:0.50]
-; SKX-NEXT: cmovgw (%rdx), %di # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_cmov_16:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: cmovow %si, %di # sched: [1:0.50]
-; BDVER2-NEXT: cmovnow %si, %di # sched: [1:0.50]
-; BDVER2-NEXT: cmovbw %si, %di # sched: [1:0.50]
-; BDVER2-NEXT: cmovbw %si, %di # sched: [1:0.50]
-; BDVER2-NEXT: cmovbw %si, %di # sched: [1:0.50]
-; BDVER2-NEXT: cmovaew %si, %di # sched: [1:0.50]
-; BDVER2-NEXT: cmovaew %si, %di # sched: [1:0.50]
-; BDVER2-NEXT: cmovaew %si, %di # sched: [1:0.50]
-; BDVER2-NEXT: cmovew %si, %di # sched: [1:0.50]
-; BDVER2-NEXT: cmovew %si, %di # sched: [1:0.50]
-; BDVER2-NEXT: cmovnew %si, %di # sched: [1:0.50]
-; BDVER2-NEXT: cmovnew %si, %di # sched: [1:0.50]
-; BDVER2-NEXT: cmovbew %si, %di # sched: [1:0.50]
-; BDVER2-NEXT: cmovbew %si, %di # sched: [1:0.50]
-; BDVER2-NEXT: cmovaw %si, %di # sched: [1:0.50]
-; BDVER2-NEXT: cmovaw %si, %di # sched: [1:0.50]
-; BDVER2-NEXT: cmovsw %si, %di # sched: [1:0.50]
-; BDVER2-NEXT: cmovnsw %si, %di # sched: [1:0.50]
-; BDVER2-NEXT: cmovpw %si, %di # sched: [1:0.50]
-; BDVER2-NEXT: cmovpw %si, %di # sched: [1:0.50]
-; BDVER2-NEXT: cmovnpw %si, %di # sched: [1:0.50]
-; BDVER2-NEXT: cmovnpw %si, %di # sched: [1:0.50]
-; BDVER2-NEXT: cmovlw %si, %di # sched: [1:0.50]
-; BDVER2-NEXT: cmovlw %si, %di # sched: [1:0.50]
-; BDVER2-NEXT: cmovgew %si, %di # sched: [1:0.50]
-; BDVER2-NEXT: cmovgew %si, %di # sched: [1:0.50]
-; BDVER2-NEXT: cmovlew %si, %di # sched: [1:0.50]
-; BDVER2-NEXT: cmovlew %si, %di # sched: [1:0.50]
-; BDVER2-NEXT: cmovgw %si, %di # sched: [1:0.50]
-; BDVER2-NEXT: cmovgw %si, %di # sched: [1:0.50]
-; BDVER2-NEXT: cmovow (%rdx), %di # sched: [5:0.50]
-; BDVER2-NEXT: cmovnow (%rdx), %di # sched: [5:0.50]
-; BDVER2-NEXT: cmovbw (%rdx), %di # sched: [5:0.50]
-; BDVER2-NEXT: cmovbw (%rdx), %di # sched: [5:0.50]
-; BDVER2-NEXT: cmovbw (%rdx), %di # sched: [5:0.50]
-; BDVER2-NEXT: cmovaew (%rdx), %di # sched: [5:0.50]
-; BDVER2-NEXT: cmovaew (%rdx), %di # sched: [5:0.50]
-; BDVER2-NEXT: cmovaew (%rdx), %di # sched: [5:0.50]
-; BDVER2-NEXT: cmovew (%rdx), %di # sched: [5:0.50]
-; BDVER2-NEXT: cmovew (%rdx), %di # sched: [5:0.50]
-; BDVER2-NEXT: cmovnew (%rdx), %di # sched: [5:0.50]
-; BDVER2-NEXT: cmovnew (%rdx), %di # sched: [5:0.50]
-; BDVER2-NEXT: cmovbew (%rdx), %di # sched: [5:0.50]
-; BDVER2-NEXT: cmovbew (%rdx), %di # sched: [5:0.50]
-; BDVER2-NEXT: cmovaw (%rdx), %di # sched: [5:0.50]
-; BDVER2-NEXT: cmovaw (%rdx), %di # sched: [5:0.50]
-; BDVER2-NEXT: cmovsw (%rdx), %di # sched: [5:0.50]
-; BDVER2-NEXT: cmovnsw (%rdx), %di # sched: [5:0.50]
-; BDVER2-NEXT: cmovpw (%rdx), %di # sched: [5:0.50]
-; BDVER2-NEXT: cmovpw (%rdx), %di # sched: [5:0.50]
-; BDVER2-NEXT: cmovnpw (%rdx), %di # sched: [5:0.50]
-; BDVER2-NEXT: cmovnpw (%rdx), %di # sched: [5:0.50]
-; BDVER2-NEXT: cmovlw (%rdx), %di # sched: [5:0.50]
-; BDVER2-NEXT: cmovlw (%rdx), %di # sched: [5:0.50]
-; BDVER2-NEXT: cmovgew (%rdx), %di # sched: [5:0.50]
-; BDVER2-NEXT: cmovgew (%rdx), %di # sched: [5:0.50]
-; BDVER2-NEXT: cmovlew (%rdx), %di # sched: [5:0.50]
-; BDVER2-NEXT: cmovlew (%rdx), %di # sched: [5:0.50]
-; BDVER2-NEXT: cmovgw (%rdx), %di # sched: [5:0.50]
-; BDVER2-NEXT: cmovgw (%rdx), %di # sched: [5:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_cmov_16:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: cmovow %si, %di # sched: [1:0.50]
-; BTVER2-NEXT: cmovnow %si, %di # sched: [1:0.50]
-; BTVER2-NEXT: cmovbw %si, %di # sched: [1:0.50]
-; BTVER2-NEXT: cmovbw %si, %di # sched: [1:0.50]
-; BTVER2-NEXT: cmovbw %si, %di # sched: [1:0.50]
-; BTVER2-NEXT: cmovaew %si, %di # sched: [1:0.50]
-; BTVER2-NEXT: cmovaew %si, %di # sched: [1:0.50]
-; BTVER2-NEXT: cmovaew %si, %di # sched: [1:0.50]
-; BTVER2-NEXT: cmovew %si, %di # sched: [1:0.50]
-; BTVER2-NEXT: cmovew %si, %di # sched: [1:0.50]
-; BTVER2-NEXT: cmovnew %si, %di # sched: [1:0.50]
-; BTVER2-NEXT: cmovnew %si, %di # sched: [1:0.50]
-; BTVER2-NEXT: cmovbew %si, %di # sched: [1:0.50]
-; BTVER2-NEXT: cmovbew %si, %di # sched: [1:0.50]
-; BTVER2-NEXT: cmovaw %si, %di # sched: [1:0.50]
-; BTVER2-NEXT: cmovaw %si, %di # sched: [1:0.50]
-; BTVER2-NEXT: cmovsw %si, %di # sched: [1:0.50]
-; BTVER2-NEXT: cmovnsw %si, %di # sched: [1:0.50]
-; BTVER2-NEXT: cmovpw %si, %di # sched: [1:0.50]
-; BTVER2-NEXT: cmovpw %si, %di # sched: [1:0.50]
-; BTVER2-NEXT: cmovnpw %si, %di # sched: [1:0.50]
-; BTVER2-NEXT: cmovnpw %si, %di # sched: [1:0.50]
-; BTVER2-NEXT: cmovlw %si, %di # sched: [1:0.50]
-; BTVER2-NEXT: cmovlw %si, %di # sched: [1:0.50]
-; BTVER2-NEXT: cmovgew %si, %di # sched: [1:0.50]
-; BTVER2-NEXT: cmovgew %si, %di # sched: [1:0.50]
-; BTVER2-NEXT: cmovlew %si, %di # sched: [1:0.50]
-; BTVER2-NEXT: cmovlew %si, %di # sched: [1:0.50]
-; BTVER2-NEXT: cmovgw %si, %di # sched: [1:0.50]
-; BTVER2-NEXT: cmovgw %si, %di # sched: [1:0.50]
-; BTVER2-NEXT: cmovow (%rdx), %di # sched: [4:1.00]
-; BTVER2-NEXT: cmovnow (%rdx), %di # sched: [4:1.00]
-; BTVER2-NEXT: cmovbw (%rdx), %di # sched: [4:1.00]
-; BTVER2-NEXT: cmovbw (%rdx), %di # sched: [4:1.00]
-; BTVER2-NEXT: cmovbw (%rdx), %di # sched: [4:1.00]
-; BTVER2-NEXT: cmovaew (%rdx), %di # sched: [4:1.00]
-; BTVER2-NEXT: cmovaew (%rdx), %di # sched: [4:1.00]
-; BTVER2-NEXT: cmovaew (%rdx), %di # sched: [4:1.00]
-; BTVER2-NEXT: cmovew (%rdx), %di # sched: [4:1.00]
-; BTVER2-NEXT: cmovew (%rdx), %di # sched: [4:1.00]
-; BTVER2-NEXT: cmovnew (%rdx), %di # sched: [4:1.00]
-; BTVER2-NEXT: cmovnew (%rdx), %di # sched: [4:1.00]
-; BTVER2-NEXT: cmovbew (%rdx), %di # sched: [4:1.00]
-; BTVER2-NEXT: cmovbew (%rdx), %di # sched: [4:1.00]
-; BTVER2-NEXT: cmovaw (%rdx), %di # sched: [4:1.00]
-; BTVER2-NEXT: cmovaw (%rdx), %di # sched: [4:1.00]
-; BTVER2-NEXT: cmovsw (%rdx), %di # sched: [4:1.00]
-; BTVER2-NEXT: cmovnsw (%rdx), %di # sched: [4:1.00]
-; BTVER2-NEXT: cmovpw (%rdx), %di # sched: [4:1.00]
-; BTVER2-NEXT: cmovpw (%rdx), %di # sched: [4:1.00]
-; BTVER2-NEXT: cmovnpw (%rdx), %di # sched: [4:1.00]
-; BTVER2-NEXT: cmovnpw (%rdx), %di # sched: [4:1.00]
-; BTVER2-NEXT: cmovlw (%rdx), %di # sched: [4:1.00]
-; BTVER2-NEXT: cmovlw (%rdx), %di # sched: [4:1.00]
-; BTVER2-NEXT: cmovgew (%rdx), %di # sched: [4:1.00]
-; BTVER2-NEXT: cmovgew (%rdx), %di # sched: [4:1.00]
-; BTVER2-NEXT: cmovlew (%rdx), %di # sched: [4:1.00]
-; BTVER2-NEXT: cmovlew (%rdx), %di # sched: [4:1.00]
-; BTVER2-NEXT: cmovgw (%rdx), %di # sched: [4:1.00]
-; BTVER2-NEXT: cmovgw (%rdx), %di # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_cmov_16:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: cmovow %si, %di # sched: [1:0.25]
-; ZNVER1-NEXT: cmovnow %si, %di # sched: [1:0.25]
-; ZNVER1-NEXT: cmovbw %si, %di # sched: [1:0.25]
-; ZNVER1-NEXT: cmovbw %si, %di # sched: [1:0.25]
-; ZNVER1-NEXT: cmovbw %si, %di # sched: [1:0.25]
-; ZNVER1-NEXT: cmovaew %si, %di # sched: [1:0.25]
-; ZNVER1-NEXT: cmovaew %si, %di # sched: [1:0.25]
-; ZNVER1-NEXT: cmovaew %si, %di # sched: [1:0.25]
-; ZNVER1-NEXT: cmovew %si, %di # sched: [1:0.25]
-; ZNVER1-NEXT: cmovew %si, %di # sched: [1:0.25]
-; ZNVER1-NEXT: cmovnew %si, %di # sched: [1:0.25]
-; ZNVER1-NEXT: cmovnew %si, %di # sched: [1:0.25]
-; ZNVER1-NEXT: cmovbew %si, %di # sched: [1:0.25]
-; ZNVER1-NEXT: cmovbew %si, %di # sched: [1:0.25]
-; ZNVER1-NEXT: cmovaw %si, %di # sched: [1:0.25]
-; ZNVER1-NEXT: cmovaw %si, %di # sched: [1:0.25]
-; ZNVER1-NEXT: cmovsw %si, %di # sched: [1:0.25]
-; ZNVER1-NEXT: cmovnsw %si, %di # sched: [1:0.25]
-; ZNVER1-NEXT: cmovpw %si, %di # sched: [1:0.25]
-; ZNVER1-NEXT: cmovpw %si, %di # sched: [1:0.25]
-; ZNVER1-NEXT: cmovnpw %si, %di # sched: [1:0.25]
-; ZNVER1-NEXT: cmovnpw %si, %di # sched: [1:0.25]
-; ZNVER1-NEXT: cmovlw %si, %di # sched: [1:0.25]
-; ZNVER1-NEXT: cmovlw %si, %di # sched: [1:0.25]
-; ZNVER1-NEXT: cmovgew %si, %di # sched: [1:0.25]
-; ZNVER1-NEXT: cmovgew %si, %di # sched: [1:0.25]
-; ZNVER1-NEXT: cmovlew %si, %di # sched: [1:0.25]
-; ZNVER1-NEXT: cmovlew %si, %di # sched: [1:0.25]
-; ZNVER1-NEXT: cmovgw %si, %di # sched: [1:0.25]
-; ZNVER1-NEXT: cmovgw %si, %di # sched: [1:0.25]
-; ZNVER1-NEXT: cmovow (%rdx), %di # sched: [5:0.50]
-; ZNVER1-NEXT: cmovnow (%rdx), %di # sched: [5:0.50]
-; ZNVER1-NEXT: cmovbw (%rdx), %di # sched: [5:0.50]
-; ZNVER1-NEXT: cmovbw (%rdx), %di # sched: [5:0.50]
-; ZNVER1-NEXT: cmovbw (%rdx), %di # sched: [5:0.50]
-; ZNVER1-NEXT: cmovaew (%rdx), %di # sched: [5:0.50]
-; ZNVER1-NEXT: cmovaew (%rdx), %di # sched: [5:0.50]
-; ZNVER1-NEXT: cmovaew (%rdx), %di # sched: [5:0.50]
-; ZNVER1-NEXT: cmovew (%rdx), %di # sched: [5:0.50]
-; ZNVER1-NEXT: cmovew (%rdx), %di # sched: [5:0.50]
-; ZNVER1-NEXT: cmovnew (%rdx), %di # sched: [5:0.50]
-; ZNVER1-NEXT: cmovnew (%rdx), %di # sched: [5:0.50]
-; ZNVER1-NEXT: cmovbew (%rdx), %di # sched: [5:0.50]
-; ZNVER1-NEXT: cmovbew (%rdx), %di # sched: [5:0.50]
-; ZNVER1-NEXT: cmovaw (%rdx), %di # sched: [5:0.50]
-; ZNVER1-NEXT: cmovaw (%rdx), %di # sched: [5:0.50]
-; ZNVER1-NEXT: cmovsw (%rdx), %di # sched: [5:0.50]
-; ZNVER1-NEXT: cmovnsw (%rdx), %di # sched: [5:0.50]
-; ZNVER1-NEXT: cmovpw (%rdx), %di # sched: [5:0.50]
-; ZNVER1-NEXT: cmovpw (%rdx), %di # sched: [5:0.50]
-; ZNVER1-NEXT: cmovnpw (%rdx), %di # sched: [5:0.50]
-; ZNVER1-NEXT: cmovnpw (%rdx), %di # sched: [5:0.50]
-; ZNVER1-NEXT: cmovlw (%rdx), %di # sched: [5:0.50]
-; ZNVER1-NEXT: cmovlw (%rdx), %di # sched: [5:0.50]
-; ZNVER1-NEXT: cmovgew (%rdx), %di # sched: [5:0.50]
-; ZNVER1-NEXT: cmovgew (%rdx), %di # sched: [5:0.50]
-; ZNVER1-NEXT: cmovlew (%rdx), %di # sched: [5:0.50]
-; ZNVER1-NEXT: cmovlew (%rdx), %di # sched: [5:0.50]
-; ZNVER1-NEXT: cmovgw (%rdx), %di # sched: [5:0.50]
-; ZNVER1-NEXT: cmovgw (%rdx), %di # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "cmovow $1, $0 \0A\09 cmovnow $1, $0 \0A\09 cmovbw $1, $0 \0A\09 cmovcw $1, $0 \0A\09 cmovnaew $1, $0 \0A\09 cmovnbw $1, $0 \0A\09 cmovncw $1, $0 \0A\09 cmovaew $1, $0 \0A\09 cmovzw $1, $0 \0A\09 cmovew $1, $0 \0A\09 cmovnzw $1, $0 \0A\09 cmovnew $1, $0 \0A\09 cmovbew $1, $0 \0A\09 cmovnaw $1, $0 \0A\09 cmovnbew $1, $0 \0A\09 cmovaw $1, $0 \0A\09 cmovsw $1, $0 \0A\09 cmovnsw $1, $0 \0A\09 cmovpw $1, $0 \0A\09 cmovpew $1, $0 \0A\09 cmovnpw $1, $0 \0A\09 cmovpow $1, $0 \0A\09 cmovlw $1, $0 \0A\09 cmovngew $1, $0 \0A\09 cmovnlw $1, $0 \0A\09 cmovgew $1, $0 \0A\09 cmovlew $1, $0 \0A\09 cmovngw $1, $0 \0A\09 cmovnlew $1, $0 \0A\09 cmovgw $1, $0 \0A\09 cmovow $2, $0 \0A\09 cmovnow $2, $0 \0A\09 cmovbw $2, $0 \0A\09 cmovcw $2, $0 \0A\09 cmovnaew $2, $0 \0A\09 cmovnbw $2, $0 \0A\09 cmovncw $2, $0 \0A\09 cmovaew $2, $0 \0A\09 cmovzw $2, $0 \0A\09 cmovew $2, $0 \0A\09 cmovnzw $2, $0 \0A\09 cmovnew $2, $0 \0A\09 cmovbew $2, $0 \0A\09 cmovnaw $2, $0 \0A\09 cmovnbew $2, $0 \0A\09 cmovaw $2, $0 \0A\09 cmovsw $2, $0 \0A\09 cmovnsw $2, $0 \0A\09 cmovpw $2, $0 \0A\09 cmovpew $2, $0 \0A\09 cmovnpw $2, $0 \0A\09 cmovpow $2, $0 \0A\09 cmovlw $2, $0 \0A\09 cmovngew $2, $0 \0A\09 cmovnlw $2, $0 \0A\09 cmovgew $2, $0 \0A\09 cmovlew $2, $0 \0A\09 cmovngw $2, $0 \0A\09 cmovnlew $2, $0 \0A\09 cmovgw $2, $0", "r,r,*m"(i16 %a0, i16 %a1, i16 *%a2)
- ret void
-}
-
-define void @test_cmov_32(i32 %a0, i32 %a1, i32 *%a2) optsize {
-; GENERIC-LABEL: test_cmov_32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: cmovol %esi, %edi # sched: [2:0.67]
-; GENERIC-NEXT: cmovnol %esi, %edi # sched: [2:0.67]
-; GENERIC-NEXT: cmovbl %esi, %edi # sched: [2:0.67]
-; GENERIC-NEXT: cmovbl %esi, %edi # sched: [2:0.67]
-; GENERIC-NEXT: cmovbl %esi, %edi # sched: [2:0.67]
-; GENERIC-NEXT: cmovael %esi, %edi # sched: [2:0.67]
-; GENERIC-NEXT: cmovael %esi, %edi # sched: [2:0.67]
-; GENERIC-NEXT: cmovael %esi, %edi # sched: [2:0.67]
-; GENERIC-NEXT: cmovel %esi, %edi # sched: [2:0.67]
-; GENERIC-NEXT: cmovel %esi, %edi # sched: [2:0.67]
-; GENERIC-NEXT: cmovnel %esi, %edi # sched: [2:0.67]
-; GENERIC-NEXT: cmovnel %esi, %edi # sched: [2:0.67]
-; GENERIC-NEXT: cmovbel %esi, %edi # sched: [3:1.00]
-; GENERIC-NEXT: cmovbel %esi, %edi # sched: [3:1.00]
-; GENERIC-NEXT: cmoval %esi, %edi # sched: [3:1.00]
-; GENERIC-NEXT: cmoval %esi, %edi # sched: [3:1.00]
-; GENERIC-NEXT: cmovsl %esi, %edi # sched: [2:0.67]
-; GENERIC-NEXT: cmovnsl %esi, %edi # sched: [2:0.67]
-; GENERIC-NEXT: cmovpl %esi, %edi # sched: [2:0.67]
-; GENERIC-NEXT: cmovpl %esi, %edi # sched: [2:0.67]
-; GENERIC-NEXT: cmovnpl %esi, %edi # sched: [2:0.67]
-; GENERIC-NEXT: cmovnpl %esi, %edi # sched: [2:0.67]
-; GENERIC-NEXT: cmovll %esi, %edi # sched: [2:0.67]
-; GENERIC-NEXT: cmovll %esi, %edi # sched: [2:0.67]
-; GENERIC-NEXT: cmovgel %esi, %edi # sched: [2:0.67]
-; GENERIC-NEXT: cmovgel %esi, %edi # sched: [2:0.67]
-; GENERIC-NEXT: cmovlel %esi, %edi # sched: [2:0.67]
-; GENERIC-NEXT: cmovlel %esi, %edi # sched: [2:0.67]
-; GENERIC-NEXT: cmovgl %esi, %edi # sched: [2:0.67]
-; GENERIC-NEXT: cmovgl %esi, %edi # sched: [2:0.67]
-; GENERIC-NEXT: cmovol (%rdx), %edi # sched: [7:0.67]
-; GENERIC-NEXT: cmovnol (%rdx), %edi # sched: [7:0.67]
-; GENERIC-NEXT: cmovbl (%rdx), %edi # sched: [7:0.67]
-; GENERIC-NEXT: cmovbl (%rdx), %edi # sched: [7:0.67]
-; GENERIC-NEXT: cmovbl (%rdx), %edi # sched: [7:0.67]
-; GENERIC-NEXT: cmovael (%rdx), %edi # sched: [7:0.67]
-; GENERIC-NEXT: cmovael (%rdx), %edi # sched: [7:0.67]
-; GENERIC-NEXT: cmovael (%rdx), %edi # sched: [7:0.67]
-; GENERIC-NEXT: cmovel (%rdx), %edi # sched: [7:0.67]
-; GENERIC-NEXT: cmovel (%rdx), %edi # sched: [7:0.67]
-; GENERIC-NEXT: cmovnel (%rdx), %edi # sched: [7:0.67]
-; GENERIC-NEXT: cmovnel (%rdx), %edi # sched: [7:0.67]
-; GENERIC-NEXT: cmovbel (%rdx), %edi # sched: [8:1.00]
-; GENERIC-NEXT: cmovbel (%rdx), %edi # sched: [8:1.00]
-; GENERIC-NEXT: cmoval (%rdx), %edi # sched: [8:1.00]
-; GENERIC-NEXT: cmoval (%rdx), %edi # sched: [8:1.00]
-; GENERIC-NEXT: cmovsl (%rdx), %edi # sched: [7:0.67]
-; GENERIC-NEXT: cmovnsl (%rdx), %edi # sched: [7:0.67]
-; GENERIC-NEXT: cmovpl (%rdx), %edi # sched: [7:0.67]
-; GENERIC-NEXT: cmovpl (%rdx), %edi # sched: [7:0.67]
-; GENERIC-NEXT: cmovnpl (%rdx), %edi # sched: [7:0.67]
-; GENERIC-NEXT: cmovnpl (%rdx), %edi # sched: [7:0.67]
-; GENERIC-NEXT: cmovll (%rdx), %edi # sched: [7:0.67]
-; GENERIC-NEXT: cmovll (%rdx), %edi # sched: [7:0.67]
-; GENERIC-NEXT: cmovgel (%rdx), %edi # sched: [7:0.67]
-; GENERIC-NEXT: cmovgel (%rdx), %edi # sched: [7:0.67]
-; GENERIC-NEXT: cmovlel (%rdx), %edi # sched: [7:0.67]
-; GENERIC-NEXT: cmovlel (%rdx), %edi # sched: [7:0.67]
-; GENERIC-NEXT: cmovgl (%rdx), %edi # sched: [7:0.67]
-; GENERIC-NEXT: cmovgl (%rdx), %edi # sched: [7:0.67]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cmov_32:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: cmovol %esi, %edi # sched: [1:0.50]
-; ATOM-NEXT: cmovnol %esi, %edi # sched: [1:0.50]
-; ATOM-NEXT: cmovbl %esi, %edi # sched: [1:0.50]
-; ATOM-NEXT: cmovbl %esi, %edi # sched: [1:0.50]
-; ATOM-NEXT: cmovbl %esi, %edi # sched: [1:0.50]
-; ATOM-NEXT: cmovael %esi, %edi # sched: [1:0.50]
-; ATOM-NEXT: cmovael %esi, %edi # sched: [1:0.50]
-; ATOM-NEXT: cmovael %esi, %edi # sched: [1:0.50]
-; ATOM-NEXT: cmovel %esi, %edi # sched: [1:0.50]
-; ATOM-NEXT: cmovel %esi, %edi # sched: [1:0.50]
-; ATOM-NEXT: cmovnel %esi, %edi # sched: [1:0.50]
-; ATOM-NEXT: cmovnel %esi, %edi # sched: [1:0.50]
-; ATOM-NEXT: cmovbel %esi, %edi # sched: [1:0.50]
-; ATOM-NEXT: cmovbel %esi, %edi # sched: [1:0.50]
-; ATOM-NEXT: cmoval %esi, %edi # sched: [1:0.50]
-; ATOM-NEXT: cmoval %esi, %edi # sched: [1:0.50]
-; ATOM-NEXT: cmovsl %esi, %edi # sched: [1:0.50]
-; ATOM-NEXT: cmovnsl %esi, %edi # sched: [1:0.50]
-; ATOM-NEXT: cmovpl %esi, %edi # sched: [1:0.50]
-; ATOM-NEXT: cmovpl %esi, %edi # sched: [1:0.50]
-; ATOM-NEXT: cmovnpl %esi, %edi # sched: [1:0.50]
-; ATOM-NEXT: cmovnpl %esi, %edi # sched: [1:0.50]
-; ATOM-NEXT: cmovll %esi, %edi # sched: [1:0.50]
-; ATOM-NEXT: cmovll %esi, %edi # sched: [1:0.50]
-; ATOM-NEXT: cmovgel %esi, %edi # sched: [1:0.50]
-; ATOM-NEXT: cmovgel %esi, %edi # sched: [1:0.50]
-; ATOM-NEXT: cmovlel %esi, %edi # sched: [1:0.50]
-; ATOM-NEXT: cmovlel %esi, %edi # sched: [1:0.50]
-; ATOM-NEXT: cmovgl %esi, %edi # sched: [1:0.50]
-; ATOM-NEXT: cmovgl %esi, %edi # sched: [1:0.50]
-; ATOM-NEXT: cmovol (%rdx), %edi # sched: [1:1.00]
-; ATOM-NEXT: cmovnol (%rdx), %edi # sched: [1:1.00]
-; ATOM-NEXT: cmovbl (%rdx), %edi # sched: [1:1.00]
-; ATOM-NEXT: cmovbl (%rdx), %edi # sched: [1:1.00]
-; ATOM-NEXT: cmovbl (%rdx), %edi # sched: [1:1.00]
-; ATOM-NEXT: cmovael (%rdx), %edi # sched: [1:1.00]
-; ATOM-NEXT: cmovael (%rdx), %edi # sched: [1:1.00]
-; ATOM-NEXT: cmovael (%rdx), %edi # sched: [1:1.00]
-; ATOM-NEXT: cmovel (%rdx), %edi # sched: [1:1.00]
-; ATOM-NEXT: cmovel (%rdx), %edi # sched: [1:1.00]
-; ATOM-NEXT: cmovnel (%rdx), %edi # sched: [1:1.00]
-; ATOM-NEXT: cmovnel (%rdx), %edi # sched: [1:1.00]
-; ATOM-NEXT: cmovbel (%rdx), %edi # sched: [1:1.00]
-; ATOM-NEXT: cmovbel (%rdx), %edi # sched: [1:1.00]
-; ATOM-NEXT: cmoval (%rdx), %edi # sched: [1:1.00]
-; ATOM-NEXT: cmoval (%rdx), %edi # sched: [1:1.00]
-; ATOM-NEXT: cmovsl (%rdx), %edi # sched: [1:1.00]
-; ATOM-NEXT: cmovnsl (%rdx), %edi # sched: [1:1.00]
-; ATOM-NEXT: cmovpl (%rdx), %edi # sched: [1:1.00]
-; ATOM-NEXT: cmovpl (%rdx), %edi # sched: [1:1.00]
-; ATOM-NEXT: cmovnpl (%rdx), %edi # sched: [1:1.00]
-; ATOM-NEXT: cmovnpl (%rdx), %edi # sched: [1:1.00]
-; ATOM-NEXT: cmovll (%rdx), %edi # sched: [1:1.00]
-; ATOM-NEXT: cmovll (%rdx), %edi # sched: [1:1.00]
-; ATOM-NEXT: cmovgel (%rdx), %edi # sched: [1:1.00]
-; ATOM-NEXT: cmovgel (%rdx), %edi # sched: [1:1.00]
-; ATOM-NEXT: cmovlel (%rdx), %edi # sched: [1:1.00]
-; ATOM-NEXT: cmovlel (%rdx), %edi # sched: [1:1.00]
-; ATOM-NEXT: cmovgl (%rdx), %edi # sched: [1:1.00]
-; ATOM-NEXT: cmovgl (%rdx), %edi # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cmov_32:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: cmovol %esi, %edi # sched: [2:1.00]
-; SLM-NEXT: cmovnol %esi, %edi # sched: [2:1.00]
-; SLM-NEXT: cmovbl %esi, %edi # sched: [2:1.00]
-; SLM-NEXT: cmovbl %esi, %edi # sched: [2:1.00]
-; SLM-NEXT: cmovbl %esi, %edi # sched: [2:1.00]
-; SLM-NEXT: cmovael %esi, %edi # sched: [2:1.00]
-; SLM-NEXT: cmovael %esi, %edi # sched: [2:1.00]
-; SLM-NEXT: cmovael %esi, %edi # sched: [2:1.00]
-; SLM-NEXT: cmovel %esi, %edi # sched: [2:1.00]
-; SLM-NEXT: cmovel %esi, %edi # sched: [2:1.00]
-; SLM-NEXT: cmovnel %esi, %edi # sched: [2:1.00]
-; SLM-NEXT: cmovnel %esi, %edi # sched: [2:1.00]
-; SLM-NEXT: cmovbel %esi, %edi # sched: [2:1.00]
-; SLM-NEXT: cmovbel %esi, %edi # sched: [2:1.00]
-; SLM-NEXT: cmoval %esi, %edi # sched: [2:1.00]
-; SLM-NEXT: cmoval %esi, %edi # sched: [2:1.00]
-; SLM-NEXT: cmovsl %esi, %edi # sched: [2:1.00]
-; SLM-NEXT: cmovnsl %esi, %edi # sched: [2:1.00]
-; SLM-NEXT: cmovpl %esi, %edi # sched: [2:1.00]
-; SLM-NEXT: cmovpl %esi, %edi # sched: [2:1.00]
-; SLM-NEXT: cmovnpl %esi, %edi # sched: [2:1.00]
-; SLM-NEXT: cmovnpl %esi, %edi # sched: [2:1.00]
-; SLM-NEXT: cmovll %esi, %edi # sched: [2:1.00]
-; SLM-NEXT: cmovll %esi, %edi # sched: [2:1.00]
-; SLM-NEXT: cmovgel %esi, %edi # sched: [2:1.00]
-; SLM-NEXT: cmovgel %esi, %edi # sched: [2:1.00]
-; SLM-NEXT: cmovlel %esi, %edi # sched: [2:1.00]
-; SLM-NEXT: cmovlel %esi, %edi # sched: [2:1.00]
-; SLM-NEXT: cmovgl %esi, %edi # sched: [2:1.00]
-; SLM-NEXT: cmovgl %esi, %edi # sched: [2:1.00]
-; SLM-NEXT: cmovol (%rdx), %edi # sched: [5:1.00]
-; SLM-NEXT: cmovnol (%rdx), %edi # sched: [5:1.00]
-; SLM-NEXT: cmovbl (%rdx), %edi # sched: [5:1.00]
-; SLM-NEXT: cmovbl (%rdx), %edi # sched: [5:1.00]
-; SLM-NEXT: cmovbl (%rdx), %edi # sched: [5:1.00]
-; SLM-NEXT: cmovael (%rdx), %edi # sched: [5:1.00]
-; SLM-NEXT: cmovael (%rdx), %edi # sched: [5:1.00]
-; SLM-NEXT: cmovael (%rdx), %edi # sched: [5:1.00]
-; SLM-NEXT: cmovel (%rdx), %edi # sched: [5:1.00]
-; SLM-NEXT: cmovel (%rdx), %edi # sched: [5:1.00]
-; SLM-NEXT: cmovnel (%rdx), %edi # sched: [5:1.00]
-; SLM-NEXT: cmovnel (%rdx), %edi # sched: [5:1.00]
-; SLM-NEXT: cmovbel (%rdx), %edi # sched: [5:1.00]
-; SLM-NEXT: cmovbel (%rdx), %edi # sched: [5:1.00]
-; SLM-NEXT: cmoval (%rdx), %edi # sched: [5:1.00]
-; SLM-NEXT: cmoval (%rdx), %edi # sched: [5:1.00]
-; SLM-NEXT: cmovsl (%rdx), %edi # sched: [5:1.00]
-; SLM-NEXT: cmovnsl (%rdx), %edi # sched: [5:1.00]
-; SLM-NEXT: cmovpl (%rdx), %edi # sched: [5:1.00]
-; SLM-NEXT: cmovpl (%rdx), %edi # sched: [5:1.00]
-; SLM-NEXT: cmovnpl (%rdx), %edi # sched: [5:1.00]
-; SLM-NEXT: cmovnpl (%rdx), %edi # sched: [5:1.00]
-; SLM-NEXT: cmovll (%rdx), %edi # sched: [5:1.00]
-; SLM-NEXT: cmovll (%rdx), %edi # sched: [5:1.00]
-; SLM-NEXT: cmovgel (%rdx), %edi # sched: [5:1.00]
-; SLM-NEXT: cmovgel (%rdx), %edi # sched: [5:1.00]
-; SLM-NEXT: cmovlel (%rdx), %edi # sched: [5:1.00]
-; SLM-NEXT: cmovlel (%rdx), %edi # sched: [5:1.00]
-; SLM-NEXT: cmovgl (%rdx), %edi # sched: [5:1.00]
-; SLM-NEXT: cmovgl (%rdx), %edi # sched: [5:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_cmov_32:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: cmovol %esi, %edi # sched: [2:0.67]
-; SANDY-NEXT: cmovnol %esi, %edi # sched: [2:0.67]
-; SANDY-NEXT: cmovbl %esi, %edi # sched: [2:0.67]
-; SANDY-NEXT: cmovbl %esi, %edi # sched: [2:0.67]
-; SANDY-NEXT: cmovbl %esi, %edi # sched: [2:0.67]
-; SANDY-NEXT: cmovael %esi, %edi # sched: [2:0.67]
-; SANDY-NEXT: cmovael %esi, %edi # sched: [2:0.67]
-; SANDY-NEXT: cmovael %esi, %edi # sched: [2:0.67]
-; SANDY-NEXT: cmovel %esi, %edi # sched: [2:0.67]
-; SANDY-NEXT: cmovel %esi, %edi # sched: [2:0.67]
-; SANDY-NEXT: cmovnel %esi, %edi # sched: [2:0.67]
-; SANDY-NEXT: cmovnel %esi, %edi # sched: [2:0.67]
-; SANDY-NEXT: cmovbel %esi, %edi # sched: [3:1.00]
-; SANDY-NEXT: cmovbel %esi, %edi # sched: [3:1.00]
-; SANDY-NEXT: cmoval %esi, %edi # sched: [3:1.00]
-; SANDY-NEXT: cmoval %esi, %edi # sched: [3:1.00]
-; SANDY-NEXT: cmovsl %esi, %edi # sched: [2:0.67]
-; SANDY-NEXT: cmovnsl %esi, %edi # sched: [2:0.67]
-; SANDY-NEXT: cmovpl %esi, %edi # sched: [2:0.67]
-; SANDY-NEXT: cmovpl %esi, %edi # sched: [2:0.67]
-; SANDY-NEXT: cmovnpl %esi, %edi # sched: [2:0.67]
-; SANDY-NEXT: cmovnpl %esi, %edi # sched: [2:0.67]
-; SANDY-NEXT: cmovll %esi, %edi # sched: [2:0.67]
-; SANDY-NEXT: cmovll %esi, %edi # sched: [2:0.67]
-; SANDY-NEXT: cmovgel %esi, %edi # sched: [2:0.67]
-; SANDY-NEXT: cmovgel %esi, %edi # sched: [2:0.67]
-; SANDY-NEXT: cmovlel %esi, %edi # sched: [2:0.67]
-; SANDY-NEXT: cmovlel %esi, %edi # sched: [2:0.67]
-; SANDY-NEXT: cmovgl %esi, %edi # sched: [2:0.67]
-; SANDY-NEXT: cmovgl %esi, %edi # sched: [2:0.67]
-; SANDY-NEXT: cmovol (%rdx), %edi # sched: [7:0.67]
-; SANDY-NEXT: cmovnol (%rdx), %edi # sched: [7:0.67]
-; SANDY-NEXT: cmovbl (%rdx), %edi # sched: [7:0.67]
-; SANDY-NEXT: cmovbl (%rdx), %edi # sched: [7:0.67]
-; SANDY-NEXT: cmovbl (%rdx), %edi # sched: [7:0.67]
-; SANDY-NEXT: cmovael (%rdx), %edi # sched: [7:0.67]
-; SANDY-NEXT: cmovael (%rdx), %edi # sched: [7:0.67]
-; SANDY-NEXT: cmovael (%rdx), %edi # sched: [7:0.67]
-; SANDY-NEXT: cmovel (%rdx), %edi # sched: [7:0.67]
-; SANDY-NEXT: cmovel (%rdx), %edi # sched: [7:0.67]
-; SANDY-NEXT: cmovnel (%rdx), %edi # sched: [7:0.67]
-; SANDY-NEXT: cmovnel (%rdx), %edi # sched: [7:0.67]
-; SANDY-NEXT: cmovbel (%rdx), %edi # sched: [8:1.00]
-; SANDY-NEXT: cmovbel (%rdx), %edi # sched: [8:1.00]
-; SANDY-NEXT: cmoval (%rdx), %edi # sched: [8:1.00]
-; SANDY-NEXT: cmoval (%rdx), %edi # sched: [8:1.00]
-; SANDY-NEXT: cmovsl (%rdx), %edi # sched: [7:0.67]
-; SANDY-NEXT: cmovnsl (%rdx), %edi # sched: [7:0.67]
-; SANDY-NEXT: cmovpl (%rdx), %edi # sched: [7:0.67]
-; SANDY-NEXT: cmovpl (%rdx), %edi # sched: [7:0.67]
-; SANDY-NEXT: cmovnpl (%rdx), %edi # sched: [7:0.67]
-; SANDY-NEXT: cmovnpl (%rdx), %edi # sched: [7:0.67]
-; SANDY-NEXT: cmovll (%rdx), %edi # sched: [7:0.67]
-; SANDY-NEXT: cmovll (%rdx), %edi # sched: [7:0.67]
-; SANDY-NEXT: cmovgel (%rdx), %edi # sched: [7:0.67]
-; SANDY-NEXT: cmovgel (%rdx), %edi # sched: [7:0.67]
-; SANDY-NEXT: cmovlel (%rdx), %edi # sched: [7:0.67]
-; SANDY-NEXT: cmovlel (%rdx), %edi # sched: [7:0.67]
-; SANDY-NEXT: cmovgl (%rdx), %edi # sched: [7:0.67]
-; SANDY-NEXT: cmovgl (%rdx), %edi # sched: [7:0.67]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_cmov_32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: cmovol %esi, %edi # sched: [2:0.50]
-; HASWELL-NEXT: cmovnol %esi, %edi # sched: [2:0.50]
-; HASWELL-NEXT: cmovbl %esi, %edi # sched: [2:0.50]
-; HASWELL-NEXT: cmovbl %esi, %edi # sched: [2:0.50]
-; HASWELL-NEXT: cmovbl %esi, %edi # sched: [2:0.50]
-; HASWELL-NEXT: cmovael %esi, %edi # sched: [2:0.50]
-; HASWELL-NEXT: cmovael %esi, %edi # sched: [2:0.50]
-; HASWELL-NEXT: cmovael %esi, %edi # sched: [2:0.50]
-; HASWELL-NEXT: cmovel %esi, %edi # sched: [2:0.50]
-; HASWELL-NEXT: cmovel %esi, %edi # sched: [2:0.50]
-; HASWELL-NEXT: cmovnel %esi, %edi # sched: [2:0.50]
-; HASWELL-NEXT: cmovnel %esi, %edi # sched: [2:0.50]
-; HASWELL-NEXT: cmovbel %esi, %edi # sched: [3:0.75]
-; HASWELL-NEXT: cmovbel %esi, %edi # sched: [3:0.75]
-; HASWELL-NEXT: cmoval %esi, %edi # sched: [3:0.75]
-; HASWELL-NEXT: cmoval %esi, %edi # sched: [3:0.75]
-; HASWELL-NEXT: cmovsl %esi, %edi # sched: [2:0.50]
-; HASWELL-NEXT: cmovnsl %esi, %edi # sched: [2:0.50]
-; HASWELL-NEXT: cmovpl %esi, %edi # sched: [2:0.50]
-; HASWELL-NEXT: cmovpl %esi, %edi # sched: [2:0.50]
-; HASWELL-NEXT: cmovnpl %esi, %edi # sched: [2:0.50]
-; HASWELL-NEXT: cmovnpl %esi, %edi # sched: [2:0.50]
-; HASWELL-NEXT: cmovll %esi, %edi # sched: [2:0.50]
-; HASWELL-NEXT: cmovll %esi, %edi # sched: [2:0.50]
-; HASWELL-NEXT: cmovgel %esi, %edi # sched: [2:0.50]
-; HASWELL-NEXT: cmovgel %esi, %edi # sched: [2:0.50]
-; HASWELL-NEXT: cmovlel %esi, %edi # sched: [2:0.50]
-; HASWELL-NEXT: cmovlel %esi, %edi # sched: [2:0.50]
-; HASWELL-NEXT: cmovgl %esi, %edi # sched: [2:0.50]
-; HASWELL-NEXT: cmovgl %esi, %edi # sched: [2:0.50]
-; HASWELL-NEXT: cmovol (%rdx), %edi # sched: [7:0.50]
-; HASWELL-NEXT: cmovnol (%rdx), %edi # sched: [7:0.50]
-; HASWELL-NEXT: cmovbl (%rdx), %edi # sched: [7:0.50]
-; HASWELL-NEXT: cmovbl (%rdx), %edi # sched: [7:0.50]
-; HASWELL-NEXT: cmovbl (%rdx), %edi # sched: [7:0.50]
-; HASWELL-NEXT: cmovael (%rdx), %edi # sched: [7:0.50]
-; HASWELL-NEXT: cmovael (%rdx), %edi # sched: [7:0.50]
-; HASWELL-NEXT: cmovael (%rdx), %edi # sched: [7:0.50]
-; HASWELL-NEXT: cmovel (%rdx), %edi # sched: [7:0.50]
-; HASWELL-NEXT: cmovel (%rdx), %edi # sched: [7:0.50]
-; HASWELL-NEXT: cmovnel (%rdx), %edi # sched: [7:0.50]
-; HASWELL-NEXT: cmovnel (%rdx), %edi # sched: [7:0.50]
-; HASWELL-NEXT: cmovbel (%rdx), %edi # sched: [8:0.75]
-; HASWELL-NEXT: cmovbel (%rdx), %edi # sched: [8:0.75]
-; HASWELL-NEXT: cmoval (%rdx), %edi # sched: [8:0.75]
-; HASWELL-NEXT: cmoval (%rdx), %edi # sched: [8:0.75]
-; HASWELL-NEXT: cmovsl (%rdx), %edi # sched: [7:0.50]
-; HASWELL-NEXT: cmovnsl (%rdx), %edi # sched: [7:0.50]
-; HASWELL-NEXT: cmovpl (%rdx), %edi # sched: [7:0.50]
-; HASWELL-NEXT: cmovpl (%rdx), %edi # sched: [7:0.50]
-; HASWELL-NEXT: cmovnpl (%rdx), %edi # sched: [7:0.50]
-; HASWELL-NEXT: cmovnpl (%rdx), %edi # sched: [7:0.50]
-; HASWELL-NEXT: cmovll (%rdx), %edi # sched: [7:0.50]
-; HASWELL-NEXT: cmovll (%rdx), %edi # sched: [7:0.50]
-; HASWELL-NEXT: cmovgel (%rdx), %edi # sched: [7:0.50]
-; HASWELL-NEXT: cmovgel (%rdx), %edi # sched: [7:0.50]
-; HASWELL-NEXT: cmovlel (%rdx), %edi # sched: [7:0.50]
-; HASWELL-NEXT: cmovlel (%rdx), %edi # sched: [7:0.50]
-; HASWELL-NEXT: cmovgl (%rdx), %edi # sched: [7:0.50]
-; HASWELL-NEXT: cmovgl (%rdx), %edi # sched: [7:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cmov_32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: cmovol %esi, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovnol %esi, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovbl %esi, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovbl %esi, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovbl %esi, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovael %esi, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovael %esi, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovael %esi, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovel %esi, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovel %esi, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovnel %esi, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovnel %esi, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovbel %esi, %edi # sched: [2:0.50]
-; BROADWELL-NEXT: cmovbel %esi, %edi # sched: [2:0.50]
-; BROADWELL-NEXT: cmoval %esi, %edi # sched: [2:0.50]
-; BROADWELL-NEXT: cmoval %esi, %edi # sched: [2:0.50]
-; BROADWELL-NEXT: cmovsl %esi, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovnsl %esi, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovpl %esi, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovpl %esi, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovnpl %esi, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovnpl %esi, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovll %esi, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovll %esi, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovgel %esi, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovgel %esi, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovlel %esi, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovlel %esi, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovgl %esi, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovgl %esi, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovol (%rdx), %edi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovnol (%rdx), %edi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovbl (%rdx), %edi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovbl (%rdx), %edi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovbl (%rdx), %edi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovael (%rdx), %edi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovael (%rdx), %edi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovael (%rdx), %edi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovel (%rdx), %edi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovel (%rdx), %edi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovnel (%rdx), %edi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovnel (%rdx), %edi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovbel (%rdx), %edi # sched: [7:0.50]
-; BROADWELL-NEXT: cmovbel (%rdx), %edi # sched: [7:0.50]
-; BROADWELL-NEXT: cmoval (%rdx), %edi # sched: [7:0.50]
-; BROADWELL-NEXT: cmoval (%rdx), %edi # sched: [7:0.50]
-; BROADWELL-NEXT: cmovsl (%rdx), %edi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovnsl (%rdx), %edi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovpl (%rdx), %edi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovpl (%rdx), %edi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovnpl (%rdx), %edi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovnpl (%rdx), %edi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovll (%rdx), %edi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovll (%rdx), %edi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovgel (%rdx), %edi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovgel (%rdx), %edi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovlel (%rdx), %edi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovlel (%rdx), %edi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovgl (%rdx), %edi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovgl (%rdx), %edi # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cmov_32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: cmovol %esi, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovnol %esi, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovbl %esi, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovbl %esi, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovbl %esi, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovael %esi, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovael %esi, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovael %esi, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovel %esi, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovel %esi, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovnel %esi, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovnel %esi, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovbel %esi, %edi # sched: [2:1.00]
-; SKYLAKE-NEXT: cmovbel %esi, %edi # sched: [2:1.00]
-; SKYLAKE-NEXT: cmoval %esi, %edi # sched: [2:1.00]
-; SKYLAKE-NEXT: cmoval %esi, %edi # sched: [2:1.00]
-; SKYLAKE-NEXT: cmovsl %esi, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovnsl %esi, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovpl %esi, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovpl %esi, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovnpl %esi, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovnpl %esi, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovll %esi, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovll %esi, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovgel %esi, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovgel %esi, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovlel %esi, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovlel %esi, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovgl %esi, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovgl %esi, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovol (%rdx), %edi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovnol (%rdx), %edi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovbl (%rdx), %edi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovbl (%rdx), %edi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovbl (%rdx), %edi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovael (%rdx), %edi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovael (%rdx), %edi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovael (%rdx), %edi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovel (%rdx), %edi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovel (%rdx), %edi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovnel (%rdx), %edi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovnel (%rdx), %edi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovbel (%rdx), %edi # sched: [7:1.00]
-; SKYLAKE-NEXT: cmovbel (%rdx), %edi # sched: [7:1.00]
-; SKYLAKE-NEXT: cmoval (%rdx), %edi # sched: [7:1.00]
-; SKYLAKE-NEXT: cmoval (%rdx), %edi # sched: [7:1.00]
-; SKYLAKE-NEXT: cmovsl (%rdx), %edi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovnsl (%rdx), %edi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovpl (%rdx), %edi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovpl (%rdx), %edi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovnpl (%rdx), %edi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovnpl (%rdx), %edi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovll (%rdx), %edi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovll (%rdx), %edi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovgel (%rdx), %edi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovgel (%rdx), %edi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovlel (%rdx), %edi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovlel (%rdx), %edi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovgl (%rdx), %edi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovgl (%rdx), %edi # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cmov_32:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: cmovol %esi, %edi # sched: [1:0.50]
-; SKX-NEXT: cmovnol %esi, %edi # sched: [1:0.50]
-; SKX-NEXT: cmovbl %esi, %edi # sched: [1:0.50]
-; SKX-NEXT: cmovbl %esi, %edi # sched: [1:0.50]
-; SKX-NEXT: cmovbl %esi, %edi # sched: [1:0.50]
-; SKX-NEXT: cmovael %esi, %edi # sched: [1:0.50]
-; SKX-NEXT: cmovael %esi, %edi # sched: [1:0.50]
-; SKX-NEXT: cmovael %esi, %edi # sched: [1:0.50]
-; SKX-NEXT: cmovel %esi, %edi # sched: [1:0.50]
-; SKX-NEXT: cmovel %esi, %edi # sched: [1:0.50]
-; SKX-NEXT: cmovnel %esi, %edi # sched: [1:0.50]
-; SKX-NEXT: cmovnel %esi, %edi # sched: [1:0.50]
-; SKX-NEXT: cmovbel %esi, %edi # sched: [2:1.00]
-; SKX-NEXT: cmovbel %esi, %edi # sched: [2:1.00]
-; SKX-NEXT: cmoval %esi, %edi # sched: [2:1.00]
-; SKX-NEXT: cmoval %esi, %edi # sched: [2:1.00]
-; SKX-NEXT: cmovsl %esi, %edi # sched: [1:0.50]
-; SKX-NEXT: cmovnsl %esi, %edi # sched: [1:0.50]
-; SKX-NEXT: cmovpl %esi, %edi # sched: [1:0.50]
-; SKX-NEXT: cmovpl %esi, %edi # sched: [1:0.50]
-; SKX-NEXT: cmovnpl %esi, %edi # sched: [1:0.50]
-; SKX-NEXT: cmovnpl %esi, %edi # sched: [1:0.50]
-; SKX-NEXT: cmovll %esi, %edi # sched: [1:0.50]
-; SKX-NEXT: cmovll %esi, %edi # sched: [1:0.50]
-; SKX-NEXT: cmovgel %esi, %edi # sched: [1:0.50]
-; SKX-NEXT: cmovgel %esi, %edi # sched: [1:0.50]
-; SKX-NEXT: cmovlel %esi, %edi # sched: [1:0.50]
-; SKX-NEXT: cmovlel %esi, %edi # sched: [1:0.50]
-; SKX-NEXT: cmovgl %esi, %edi # sched: [1:0.50]
-; SKX-NEXT: cmovgl %esi, %edi # sched: [1:0.50]
-; SKX-NEXT: cmovol (%rdx), %edi # sched: [6:0.50]
-; SKX-NEXT: cmovnol (%rdx), %edi # sched: [6:0.50]
-; SKX-NEXT: cmovbl (%rdx), %edi # sched: [6:0.50]
-; SKX-NEXT: cmovbl (%rdx), %edi # sched: [6:0.50]
-; SKX-NEXT: cmovbl (%rdx), %edi # sched: [6:0.50]
-; SKX-NEXT: cmovael (%rdx), %edi # sched: [6:0.50]
-; SKX-NEXT: cmovael (%rdx), %edi # sched: [6:0.50]
-; SKX-NEXT: cmovael (%rdx), %edi # sched: [6:0.50]
-; SKX-NEXT: cmovel (%rdx), %edi # sched: [6:0.50]
-; SKX-NEXT: cmovel (%rdx), %edi # sched: [6:0.50]
-; SKX-NEXT: cmovnel (%rdx), %edi # sched: [6:0.50]
-; SKX-NEXT: cmovnel (%rdx), %edi # sched: [6:0.50]
-; SKX-NEXT: cmovbel (%rdx), %edi # sched: [7:1.00]
-; SKX-NEXT: cmovbel (%rdx), %edi # sched: [7:1.00]
-; SKX-NEXT: cmoval (%rdx), %edi # sched: [7:1.00]
-; SKX-NEXT: cmoval (%rdx), %edi # sched: [7:1.00]
-; SKX-NEXT: cmovsl (%rdx), %edi # sched: [6:0.50]
-; SKX-NEXT: cmovnsl (%rdx), %edi # sched: [6:0.50]
-; SKX-NEXT: cmovpl (%rdx), %edi # sched: [6:0.50]
-; SKX-NEXT: cmovpl (%rdx), %edi # sched: [6:0.50]
-; SKX-NEXT: cmovnpl (%rdx), %edi # sched: [6:0.50]
-; SKX-NEXT: cmovnpl (%rdx), %edi # sched: [6:0.50]
-; SKX-NEXT: cmovll (%rdx), %edi # sched: [6:0.50]
-; SKX-NEXT: cmovll (%rdx), %edi # sched: [6:0.50]
-; SKX-NEXT: cmovgel (%rdx), %edi # sched: [6:0.50]
-; SKX-NEXT: cmovgel (%rdx), %edi # sched: [6:0.50]
-; SKX-NEXT: cmovlel (%rdx), %edi # sched: [6:0.50]
-; SKX-NEXT: cmovlel (%rdx), %edi # sched: [6:0.50]
-; SKX-NEXT: cmovgl (%rdx), %edi # sched: [6:0.50]
-; SKX-NEXT: cmovgl (%rdx), %edi # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_cmov_32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: cmovol %esi, %edi # sched: [1:0.50]
-; BDVER2-NEXT: cmovnol %esi, %edi # sched: [1:0.50]
-; BDVER2-NEXT: cmovbl %esi, %edi # sched: [1:0.50]
-; BDVER2-NEXT: cmovbl %esi, %edi # sched: [1:0.50]
-; BDVER2-NEXT: cmovbl %esi, %edi # sched: [1:0.50]
-; BDVER2-NEXT: cmovael %esi, %edi # sched: [1:0.50]
-; BDVER2-NEXT: cmovael %esi, %edi # sched: [1:0.50]
-; BDVER2-NEXT: cmovael %esi, %edi # sched: [1:0.50]
-; BDVER2-NEXT: cmovel %esi, %edi # sched: [1:0.50]
-; BDVER2-NEXT: cmovel %esi, %edi # sched: [1:0.50]
-; BDVER2-NEXT: cmovnel %esi, %edi # sched: [1:0.50]
-; BDVER2-NEXT: cmovnel %esi, %edi # sched: [1:0.50]
-; BDVER2-NEXT: cmovbel %esi, %edi # sched: [1:0.50]
-; BDVER2-NEXT: cmovbel %esi, %edi # sched: [1:0.50]
-; BDVER2-NEXT: cmoval %esi, %edi # sched: [1:0.50]
-; BDVER2-NEXT: cmoval %esi, %edi # sched: [1:0.50]
-; BDVER2-NEXT: cmovsl %esi, %edi # sched: [1:0.50]
-; BDVER2-NEXT: cmovnsl %esi, %edi # sched: [1:0.50]
-; BDVER2-NEXT: cmovpl %esi, %edi # sched: [1:0.50]
-; BDVER2-NEXT: cmovpl %esi, %edi # sched: [1:0.50]
-; BDVER2-NEXT: cmovnpl %esi, %edi # sched: [1:0.50]
-; BDVER2-NEXT: cmovnpl %esi, %edi # sched: [1:0.50]
-; BDVER2-NEXT: cmovll %esi, %edi # sched: [1:0.50]
-; BDVER2-NEXT: cmovll %esi, %edi # sched: [1:0.50]
-; BDVER2-NEXT: cmovgel %esi, %edi # sched: [1:0.50]
-; BDVER2-NEXT: cmovgel %esi, %edi # sched: [1:0.50]
-; BDVER2-NEXT: cmovlel %esi, %edi # sched: [1:0.50]
-; BDVER2-NEXT: cmovlel %esi, %edi # sched: [1:0.50]
-; BDVER2-NEXT: cmovgl %esi, %edi # sched: [1:0.50]
-; BDVER2-NEXT: cmovgl %esi, %edi # sched: [1:0.50]
-; BDVER2-NEXT: cmovol (%rdx), %edi # sched: [5:0.50]
-; BDVER2-NEXT: cmovnol (%rdx), %edi # sched: [5:0.50]
-; BDVER2-NEXT: cmovbl (%rdx), %edi # sched: [5:0.50]
-; BDVER2-NEXT: cmovbl (%rdx), %edi # sched: [5:0.50]
-; BDVER2-NEXT: cmovbl (%rdx), %edi # sched: [5:0.50]
-; BDVER2-NEXT: cmovael (%rdx), %edi # sched: [5:0.50]
-; BDVER2-NEXT: cmovael (%rdx), %edi # sched: [5:0.50]
-; BDVER2-NEXT: cmovael (%rdx), %edi # sched: [5:0.50]
-; BDVER2-NEXT: cmovel (%rdx), %edi # sched: [5:0.50]
-; BDVER2-NEXT: cmovel (%rdx), %edi # sched: [5:0.50]
-; BDVER2-NEXT: cmovnel (%rdx), %edi # sched: [5:0.50]
-; BDVER2-NEXT: cmovnel (%rdx), %edi # sched: [5:0.50]
-; BDVER2-NEXT: cmovbel (%rdx), %edi # sched: [5:0.50]
-; BDVER2-NEXT: cmovbel (%rdx), %edi # sched: [5:0.50]
-; BDVER2-NEXT: cmoval (%rdx), %edi # sched: [5:0.50]
-; BDVER2-NEXT: cmoval (%rdx), %edi # sched: [5:0.50]
-; BDVER2-NEXT: cmovsl (%rdx), %edi # sched: [5:0.50]
-; BDVER2-NEXT: cmovnsl (%rdx), %edi # sched: [5:0.50]
-; BDVER2-NEXT: cmovpl (%rdx), %edi # sched: [5:0.50]
-; BDVER2-NEXT: cmovpl (%rdx), %edi # sched: [5:0.50]
-; BDVER2-NEXT: cmovnpl (%rdx), %edi # sched: [5:0.50]
-; BDVER2-NEXT: cmovnpl (%rdx), %edi # sched: [5:0.50]
-; BDVER2-NEXT: cmovll (%rdx), %edi # sched: [5:0.50]
-; BDVER2-NEXT: cmovll (%rdx), %edi # sched: [5:0.50]
-; BDVER2-NEXT: cmovgel (%rdx), %edi # sched: [5:0.50]
-; BDVER2-NEXT: cmovgel (%rdx), %edi # sched: [5:0.50]
-; BDVER2-NEXT: cmovlel (%rdx), %edi # sched: [5:0.50]
-; BDVER2-NEXT: cmovlel (%rdx), %edi # sched: [5:0.50]
-; BDVER2-NEXT: cmovgl (%rdx), %edi # sched: [5:0.50]
-; BDVER2-NEXT: cmovgl (%rdx), %edi # sched: [5:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_cmov_32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: cmovol %esi, %edi # sched: [1:0.50]
-; BTVER2-NEXT: cmovnol %esi, %edi # sched: [1:0.50]
-; BTVER2-NEXT: cmovbl %esi, %edi # sched: [1:0.50]
-; BTVER2-NEXT: cmovbl %esi, %edi # sched: [1:0.50]
-; BTVER2-NEXT: cmovbl %esi, %edi # sched: [1:0.50]
-; BTVER2-NEXT: cmovael %esi, %edi # sched: [1:0.50]
-; BTVER2-NEXT: cmovael %esi, %edi # sched: [1:0.50]
-; BTVER2-NEXT: cmovael %esi, %edi # sched: [1:0.50]
-; BTVER2-NEXT: cmovel %esi, %edi # sched: [1:0.50]
-; BTVER2-NEXT: cmovel %esi, %edi # sched: [1:0.50]
-; BTVER2-NEXT: cmovnel %esi, %edi # sched: [1:0.50]
-; BTVER2-NEXT: cmovnel %esi, %edi # sched: [1:0.50]
-; BTVER2-NEXT: cmovbel %esi, %edi # sched: [1:0.50]
-; BTVER2-NEXT: cmovbel %esi, %edi # sched: [1:0.50]
-; BTVER2-NEXT: cmoval %esi, %edi # sched: [1:0.50]
-; BTVER2-NEXT: cmoval %esi, %edi # sched: [1:0.50]
-; BTVER2-NEXT: cmovsl %esi, %edi # sched: [1:0.50]
-; BTVER2-NEXT: cmovnsl %esi, %edi # sched: [1:0.50]
-; BTVER2-NEXT: cmovpl %esi, %edi # sched: [1:0.50]
-; BTVER2-NEXT: cmovpl %esi, %edi # sched: [1:0.50]
-; BTVER2-NEXT: cmovnpl %esi, %edi # sched: [1:0.50]
-; BTVER2-NEXT: cmovnpl %esi, %edi # sched: [1:0.50]
-; BTVER2-NEXT: cmovll %esi, %edi # sched: [1:0.50]
-; BTVER2-NEXT: cmovll %esi, %edi # sched: [1:0.50]
-; BTVER2-NEXT: cmovgel %esi, %edi # sched: [1:0.50]
-; BTVER2-NEXT: cmovgel %esi, %edi # sched: [1:0.50]
-; BTVER2-NEXT: cmovlel %esi, %edi # sched: [1:0.50]
-; BTVER2-NEXT: cmovlel %esi, %edi # sched: [1:0.50]
-; BTVER2-NEXT: cmovgl %esi, %edi # sched: [1:0.50]
-; BTVER2-NEXT: cmovgl %esi, %edi # sched: [1:0.50]
-; BTVER2-NEXT: cmovol (%rdx), %edi # sched: [4:1.00]
-; BTVER2-NEXT: cmovnol (%rdx), %edi # sched: [4:1.00]
-; BTVER2-NEXT: cmovbl (%rdx), %edi # sched: [4:1.00]
-; BTVER2-NEXT: cmovbl (%rdx), %edi # sched: [4:1.00]
-; BTVER2-NEXT: cmovbl (%rdx), %edi # sched: [4:1.00]
-; BTVER2-NEXT: cmovael (%rdx), %edi # sched: [4:1.00]
-; BTVER2-NEXT: cmovael (%rdx), %edi # sched: [4:1.00]
-; BTVER2-NEXT: cmovael (%rdx), %edi # sched: [4:1.00]
-; BTVER2-NEXT: cmovel (%rdx), %edi # sched: [4:1.00]
-; BTVER2-NEXT: cmovel (%rdx), %edi # sched: [4:1.00]
-; BTVER2-NEXT: cmovnel (%rdx), %edi # sched: [4:1.00]
-; BTVER2-NEXT: cmovnel (%rdx), %edi # sched: [4:1.00]
-; BTVER2-NEXT: cmovbel (%rdx), %edi # sched: [4:1.00]
-; BTVER2-NEXT: cmovbel (%rdx), %edi # sched: [4:1.00]
-; BTVER2-NEXT: cmoval (%rdx), %edi # sched: [4:1.00]
-; BTVER2-NEXT: cmoval (%rdx), %edi # sched: [4:1.00]
-; BTVER2-NEXT: cmovsl (%rdx), %edi # sched: [4:1.00]
-; BTVER2-NEXT: cmovnsl (%rdx), %edi # sched: [4:1.00]
-; BTVER2-NEXT: cmovpl (%rdx), %edi # sched: [4:1.00]
-; BTVER2-NEXT: cmovpl (%rdx), %edi # sched: [4:1.00]
-; BTVER2-NEXT: cmovnpl (%rdx), %edi # sched: [4:1.00]
-; BTVER2-NEXT: cmovnpl (%rdx), %edi # sched: [4:1.00]
-; BTVER2-NEXT: cmovll (%rdx), %edi # sched: [4:1.00]
-; BTVER2-NEXT: cmovll (%rdx), %edi # sched: [4:1.00]
-; BTVER2-NEXT: cmovgel (%rdx), %edi # sched: [4:1.00]
-; BTVER2-NEXT: cmovgel (%rdx), %edi # sched: [4:1.00]
-; BTVER2-NEXT: cmovlel (%rdx), %edi # sched: [4:1.00]
-; BTVER2-NEXT: cmovlel (%rdx), %edi # sched: [4:1.00]
-; BTVER2-NEXT: cmovgl (%rdx), %edi # sched: [4:1.00]
-; BTVER2-NEXT: cmovgl (%rdx), %edi # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_cmov_32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: cmovol %esi, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovnol %esi, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovbl %esi, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovbl %esi, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovbl %esi, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovael %esi, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovael %esi, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovael %esi, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovel %esi, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovel %esi, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovnel %esi, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovnel %esi, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovbel %esi, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovbel %esi, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: cmoval %esi, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: cmoval %esi, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovsl %esi, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovnsl %esi, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovpl %esi, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovpl %esi, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovnpl %esi, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovnpl %esi, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovll %esi, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovll %esi, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovgel %esi, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovgel %esi, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovlel %esi, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovlel %esi, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovgl %esi, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovgl %esi, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovol (%rdx), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovnol (%rdx), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovbl (%rdx), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovbl (%rdx), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovbl (%rdx), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovael (%rdx), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovael (%rdx), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovael (%rdx), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovel (%rdx), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovel (%rdx), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovnel (%rdx), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovnel (%rdx), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovbel (%rdx), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovbel (%rdx), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: cmoval (%rdx), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: cmoval (%rdx), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovsl (%rdx), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovnsl (%rdx), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovpl (%rdx), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovpl (%rdx), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovnpl (%rdx), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovnpl (%rdx), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovll (%rdx), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovll (%rdx), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovgel (%rdx), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovgel (%rdx), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovlel (%rdx), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovlel (%rdx), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovgl (%rdx), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovgl (%rdx), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "cmovol $1, $0 \0A\09 cmovnol $1, $0 \0A\09 cmovbl $1, $0 \0A\09 cmovcl $1, $0 \0A\09 cmovnael $1, $0 \0A\09 cmovnbl $1, $0 \0A\09 cmovncl $1, $0 \0A\09 cmovael $1, $0 \0A\09 cmovzl $1, $0 \0A\09 cmovel $1, $0 \0A\09 cmovnzl $1, $0 \0A\09 cmovnel $1, $0 \0A\09 cmovbel $1, $0 \0A\09 cmovnal $1, $0 \0A\09 cmovnbel $1, $0 \0A\09 cmoval $1, $0 \0A\09 cmovsl $1, $0 \0A\09 cmovnsl $1, $0 \0A\09 cmovpl $1, $0 \0A\09 cmovpel $1, $0 \0A\09 cmovnpl $1, $0 \0A\09 cmovpol $1, $0 \0A\09 cmovll $1, $0 \0A\09 cmovngel $1, $0 \0A\09 cmovnll $1, $0 \0A\09 cmovgel $1, $0 \0A\09 cmovlel $1, $0 \0A\09 cmovngl $1, $0 \0A\09 cmovnlel $1, $0 \0A\09 cmovgl $1, $0 \0A\09 cmovol $2, $0 \0A\09 cmovnol $2, $0 \0A\09 cmovbl $2, $0 \0A\09 cmovcl $2, $0 \0A\09 cmovnael $2, $0 \0A\09 cmovnbl $2, $0 \0A\09 cmovncl $2, $0 \0A\09 cmovael $2, $0 \0A\09 cmovzl $2, $0 \0A\09 cmovel $2, $0 \0A\09 cmovnzl $2, $0 \0A\09 cmovnel $2, $0 \0A\09 cmovbel $2, $0 \0A\09 cmovnal $2, $0 \0A\09 cmovnbel $2, $0 \0A\09 cmoval $2, $0 \0A\09 cmovsl $2, $0 \0A\09 cmovnsl $2, $0 \0A\09 cmovpl $2, $0 \0A\09 cmovpel $2, $0 \0A\09 cmovnpl $2, $0 \0A\09 cmovpol $2, $0 \0A\09 cmovll $2, $0 \0A\09 cmovngel $2, $0 \0A\09 cmovnll $2, $0 \0A\09 cmovgel $2, $0 \0A\09 cmovlel $2, $0 \0A\09 cmovngl $2, $0 \0A\09 cmovnlel $2, $0 \0A\09 cmovgl $2, $0", "r,r,*m"(i32 %a0, i32 %a1, i32 *%a2)
- ret void
-}
-
-define void @test_cmov_64(i64 %a0, i64 %a1, i64 *%a2) optsize {
-; GENERIC-LABEL: test_cmov_64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: cmovoq %rsi, %rdi # sched: [2:0.67]
-; GENERIC-NEXT: cmovnoq %rsi, %rdi # sched: [2:0.67]
-; GENERIC-NEXT: cmovbq %rsi, %rdi # sched: [2:0.67]
-; GENERIC-NEXT: cmovbq %rsi, %rdi # sched: [2:0.67]
-; GENERIC-NEXT: cmovbq %rsi, %rdi # sched: [2:0.67]
-; GENERIC-NEXT: cmovaeq %rsi, %rdi # sched: [2:0.67]
-; GENERIC-NEXT: cmovaeq %rsi, %rdi # sched: [2:0.67]
-; GENERIC-NEXT: cmovaeq %rsi, %rdi # sched: [2:0.67]
-; GENERIC-NEXT: cmoveq %rsi, %rdi # sched: [2:0.67]
-; GENERIC-NEXT: cmoveq %rsi, %rdi # sched: [2:0.67]
-; GENERIC-NEXT: cmovneq %rsi, %rdi # sched: [2:0.67]
-; GENERIC-NEXT: cmovneq %rsi, %rdi # sched: [2:0.67]
-; GENERIC-NEXT: cmovbeq %rsi, %rdi # sched: [3:1.00]
-; GENERIC-NEXT: cmovbeq %rsi, %rdi # sched: [3:1.00]
-; GENERIC-NEXT: cmovaq %rsi, %rdi # sched: [3:1.00]
-; GENERIC-NEXT: cmovaq %rsi, %rdi # sched: [3:1.00]
-; GENERIC-NEXT: cmovsq %rsi, %rdi # sched: [2:0.67]
-; GENERIC-NEXT: cmovnsq %rsi, %rdi # sched: [2:0.67]
-; GENERIC-NEXT: cmovpq %rsi, %rdi # sched: [2:0.67]
-; GENERIC-NEXT: cmovpq %rsi, %rdi # sched: [2:0.67]
-; GENERIC-NEXT: cmovnpq %rsi, %rdi # sched: [2:0.67]
-; GENERIC-NEXT: cmovnpq %rsi, %rdi # sched: [2:0.67]
-; GENERIC-NEXT: cmovlq %rsi, %rdi # sched: [2:0.67]
-; GENERIC-NEXT: cmovlq %rsi, %rdi # sched: [2:0.67]
-; GENERIC-NEXT: cmovgeq %rsi, %rdi # sched: [2:0.67]
-; GENERIC-NEXT: cmovgeq %rsi, %rdi # sched: [2:0.67]
-; GENERIC-NEXT: cmovleq %rsi, %rdi # sched: [2:0.67]
-; GENERIC-NEXT: cmovleq %rsi, %rdi # sched: [2:0.67]
-; GENERIC-NEXT: cmovgq %rsi, %rdi # sched: [2:0.67]
-; GENERIC-NEXT: cmovgq %rsi, %rdi # sched: [2:0.67]
-; GENERIC-NEXT: cmovoq (%rdx), %rdi # sched: [7:0.67]
-; GENERIC-NEXT: cmovnoq (%rdx), %rdi # sched: [7:0.67]
-; GENERIC-NEXT: cmovbq (%rdx), %rdi # sched: [7:0.67]
-; GENERIC-NEXT: cmovbq (%rdx), %rdi # sched: [7:0.67]
-; GENERIC-NEXT: cmovbq (%rdx), %rdi # sched: [7:0.67]
-; GENERIC-NEXT: cmovaeq (%rdx), %rdi # sched: [7:0.67]
-; GENERIC-NEXT: cmovaeq (%rdx), %rdi # sched: [7:0.67]
-; GENERIC-NEXT: cmovaeq (%rdx), %rdi # sched: [7:0.67]
-; GENERIC-NEXT: cmoveq (%rdx), %rdi # sched: [7:0.67]
-; GENERIC-NEXT: cmoveq (%rdx), %rdi # sched: [7:0.67]
-; GENERIC-NEXT: cmovneq (%rdx), %rdi # sched: [7:0.67]
-; GENERIC-NEXT: cmovneq (%rdx), %rdi # sched: [7:0.67]
-; GENERIC-NEXT: cmovbeq (%rdx), %rdi # sched: [8:1.00]
-; GENERIC-NEXT: cmovbeq (%rdx), %rdi # sched: [8:1.00]
-; GENERIC-NEXT: cmovaq (%rdx), %rdi # sched: [8:1.00]
-; GENERIC-NEXT: cmovaq (%rdx), %rdi # sched: [8:1.00]
-; GENERIC-NEXT: cmovsq (%rdx), %rdi # sched: [7:0.67]
-; GENERIC-NEXT: cmovnsq (%rdx), %rdi # sched: [7:0.67]
-; GENERIC-NEXT: cmovpq (%rdx), %rdi # sched: [7:0.67]
-; GENERIC-NEXT: cmovpq (%rdx), %rdi # sched: [7:0.67]
-; GENERIC-NEXT: cmovnpq (%rdx), %rdi # sched: [7:0.67]
-; GENERIC-NEXT: cmovnpq (%rdx), %rdi # sched: [7:0.67]
-; GENERIC-NEXT: cmovlq (%rdx), %rdi # sched: [7:0.67]
-; GENERIC-NEXT: cmovlq (%rdx), %rdi # sched: [7:0.67]
-; GENERIC-NEXT: cmovgeq (%rdx), %rdi # sched: [7:0.67]
-; GENERIC-NEXT: cmovgeq (%rdx), %rdi # sched: [7:0.67]
-; GENERIC-NEXT: cmovleq (%rdx), %rdi # sched: [7:0.67]
-; GENERIC-NEXT: cmovleq (%rdx), %rdi # sched: [7:0.67]
-; GENERIC-NEXT: cmovgq (%rdx), %rdi # sched: [7:0.67]
-; GENERIC-NEXT: cmovgq (%rdx), %rdi # sched: [7:0.67]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cmov_64:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: cmovoq %rsi, %rdi # sched: [1:0.50]
-; ATOM-NEXT: cmovnoq %rsi, %rdi # sched: [1:0.50]
-; ATOM-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50]
-; ATOM-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50]
-; ATOM-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50]
-; ATOM-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50]
-; ATOM-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50]
-; ATOM-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50]
-; ATOM-NEXT: cmoveq %rsi, %rdi # sched: [1:0.50]
-; ATOM-NEXT: cmoveq %rsi, %rdi # sched: [1:0.50]
-; ATOM-NEXT: cmovneq %rsi, %rdi # sched: [1:0.50]
-; ATOM-NEXT: cmovneq %rsi, %rdi # sched: [1:0.50]
-; ATOM-NEXT: cmovbeq %rsi, %rdi # sched: [1:0.50]
-; ATOM-NEXT: cmovbeq %rsi, %rdi # sched: [1:0.50]
-; ATOM-NEXT: cmovaq %rsi, %rdi # sched: [1:0.50]
-; ATOM-NEXT: cmovaq %rsi, %rdi # sched: [1:0.50]
-; ATOM-NEXT: cmovsq %rsi, %rdi # sched: [1:0.50]
-; ATOM-NEXT: cmovnsq %rsi, %rdi # sched: [1:0.50]
-; ATOM-NEXT: cmovpq %rsi, %rdi # sched: [1:0.50]
-; ATOM-NEXT: cmovpq %rsi, %rdi # sched: [1:0.50]
-; ATOM-NEXT: cmovnpq %rsi, %rdi # sched: [1:0.50]
-; ATOM-NEXT: cmovnpq %rsi, %rdi # sched: [1:0.50]
-; ATOM-NEXT: cmovlq %rsi, %rdi # sched: [1:0.50]
-; ATOM-NEXT: cmovlq %rsi, %rdi # sched: [1:0.50]
-; ATOM-NEXT: cmovgeq %rsi, %rdi # sched: [1:0.50]
-; ATOM-NEXT: cmovgeq %rsi, %rdi # sched: [1:0.50]
-; ATOM-NEXT: cmovleq %rsi, %rdi # sched: [1:0.50]
-; ATOM-NEXT: cmovleq %rsi, %rdi # sched: [1:0.50]
-; ATOM-NEXT: cmovgq %rsi, %rdi # sched: [1:0.50]
-; ATOM-NEXT: cmovgq %rsi, %rdi # sched: [1:0.50]
-; ATOM-NEXT: cmovoq (%rdx), %rdi # sched: [1:1.00]
-; ATOM-NEXT: cmovnoq (%rdx), %rdi # sched: [1:1.00]
-; ATOM-NEXT: cmovbq (%rdx), %rdi # sched: [1:1.00]
-; ATOM-NEXT: cmovbq (%rdx), %rdi # sched: [1:1.00]
-; ATOM-NEXT: cmovbq (%rdx), %rdi # sched: [1:1.00]
-; ATOM-NEXT: cmovaeq (%rdx), %rdi # sched: [1:1.00]
-; ATOM-NEXT: cmovaeq (%rdx), %rdi # sched: [1:1.00]
-; ATOM-NEXT: cmovaeq (%rdx), %rdi # sched: [1:1.00]
-; ATOM-NEXT: cmoveq (%rdx), %rdi # sched: [1:1.00]
-; ATOM-NEXT: cmoveq (%rdx), %rdi # sched: [1:1.00]
-; ATOM-NEXT: cmovneq (%rdx), %rdi # sched: [1:1.00]
-; ATOM-NEXT: cmovneq (%rdx), %rdi # sched: [1:1.00]
-; ATOM-NEXT: cmovbeq (%rdx), %rdi # sched: [1:1.00]
-; ATOM-NEXT: cmovbeq (%rdx), %rdi # sched: [1:1.00]
-; ATOM-NEXT: cmovaq (%rdx), %rdi # sched: [1:1.00]
-; ATOM-NEXT: cmovaq (%rdx), %rdi # sched: [1:1.00]
-; ATOM-NEXT: cmovsq (%rdx), %rdi # sched: [1:1.00]
-; ATOM-NEXT: cmovnsq (%rdx), %rdi # sched: [1:1.00]
-; ATOM-NEXT: cmovpq (%rdx), %rdi # sched: [1:1.00]
-; ATOM-NEXT: cmovpq (%rdx), %rdi # sched: [1:1.00]
-; ATOM-NEXT: cmovnpq (%rdx), %rdi # sched: [1:1.00]
-; ATOM-NEXT: cmovnpq (%rdx), %rdi # sched: [1:1.00]
-; ATOM-NEXT: cmovlq (%rdx), %rdi # sched: [1:1.00]
-; ATOM-NEXT: cmovlq (%rdx), %rdi # sched: [1:1.00]
-; ATOM-NEXT: cmovgeq (%rdx), %rdi # sched: [1:1.00]
-; ATOM-NEXT: cmovgeq (%rdx), %rdi # sched: [1:1.00]
-; ATOM-NEXT: cmovleq (%rdx), %rdi # sched: [1:1.00]
-; ATOM-NEXT: cmovleq (%rdx), %rdi # sched: [1:1.00]
-; ATOM-NEXT: cmovgq (%rdx), %rdi # sched: [1:1.00]
-; ATOM-NEXT: cmovgq (%rdx), %rdi # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cmov_64:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: cmovoq %rsi, %rdi # sched: [2:1.00]
-; SLM-NEXT: cmovnoq %rsi, %rdi # sched: [2:1.00]
-; SLM-NEXT: cmovbq %rsi, %rdi # sched: [2:1.00]
-; SLM-NEXT: cmovbq %rsi, %rdi # sched: [2:1.00]
-; SLM-NEXT: cmovbq %rsi, %rdi # sched: [2:1.00]
-; SLM-NEXT: cmovaeq %rsi, %rdi # sched: [2:1.00]
-; SLM-NEXT: cmovaeq %rsi, %rdi # sched: [2:1.00]
-; SLM-NEXT: cmovaeq %rsi, %rdi # sched: [2:1.00]
-; SLM-NEXT: cmoveq %rsi, %rdi # sched: [2:1.00]
-; SLM-NEXT: cmoveq %rsi, %rdi # sched: [2:1.00]
-; SLM-NEXT: cmovneq %rsi, %rdi # sched: [2:1.00]
-; SLM-NEXT: cmovneq %rsi, %rdi # sched: [2:1.00]
-; SLM-NEXT: cmovbeq %rsi, %rdi # sched: [2:1.00]
-; SLM-NEXT: cmovbeq %rsi, %rdi # sched: [2:1.00]
-; SLM-NEXT: cmovaq %rsi, %rdi # sched: [2:1.00]
-; SLM-NEXT: cmovaq %rsi, %rdi # sched: [2:1.00]
-; SLM-NEXT: cmovsq %rsi, %rdi # sched: [2:1.00]
-; SLM-NEXT: cmovnsq %rsi, %rdi # sched: [2:1.00]
-; SLM-NEXT: cmovpq %rsi, %rdi # sched: [2:1.00]
-; SLM-NEXT: cmovpq %rsi, %rdi # sched: [2:1.00]
-; SLM-NEXT: cmovnpq %rsi, %rdi # sched: [2:1.00]
-; SLM-NEXT: cmovnpq %rsi, %rdi # sched: [2:1.00]
-; SLM-NEXT: cmovlq %rsi, %rdi # sched: [2:1.00]
-; SLM-NEXT: cmovlq %rsi, %rdi # sched: [2:1.00]
-; SLM-NEXT: cmovgeq %rsi, %rdi # sched: [2:1.00]
-; SLM-NEXT: cmovgeq %rsi, %rdi # sched: [2:1.00]
-; SLM-NEXT: cmovleq %rsi, %rdi # sched: [2:1.00]
-; SLM-NEXT: cmovleq %rsi, %rdi # sched: [2:1.00]
-; SLM-NEXT: cmovgq %rsi, %rdi # sched: [2:1.00]
-; SLM-NEXT: cmovgq %rsi, %rdi # sched: [2:1.00]
-; SLM-NEXT: cmovoq (%rdx), %rdi # sched: [5:1.00]
-; SLM-NEXT: cmovnoq (%rdx), %rdi # sched: [5:1.00]
-; SLM-NEXT: cmovbq (%rdx), %rdi # sched: [5:1.00]
-; SLM-NEXT: cmovbq (%rdx), %rdi # sched: [5:1.00]
-; SLM-NEXT: cmovbq (%rdx), %rdi # sched: [5:1.00]
-; SLM-NEXT: cmovaeq (%rdx), %rdi # sched: [5:1.00]
-; SLM-NEXT: cmovaeq (%rdx), %rdi # sched: [5:1.00]
-; SLM-NEXT: cmovaeq (%rdx), %rdi # sched: [5:1.00]
-; SLM-NEXT: cmoveq (%rdx), %rdi # sched: [5:1.00]
-; SLM-NEXT: cmoveq (%rdx), %rdi # sched: [5:1.00]
-; SLM-NEXT: cmovneq (%rdx), %rdi # sched: [5:1.00]
-; SLM-NEXT: cmovneq (%rdx), %rdi # sched: [5:1.00]
-; SLM-NEXT: cmovbeq (%rdx), %rdi # sched: [5:1.00]
-; SLM-NEXT: cmovbeq (%rdx), %rdi # sched: [5:1.00]
-; SLM-NEXT: cmovaq (%rdx), %rdi # sched: [5:1.00]
-; SLM-NEXT: cmovaq (%rdx), %rdi # sched: [5:1.00]
-; SLM-NEXT: cmovsq (%rdx), %rdi # sched: [5:1.00]
-; SLM-NEXT: cmovnsq (%rdx), %rdi # sched: [5:1.00]
-; SLM-NEXT: cmovpq (%rdx), %rdi # sched: [5:1.00]
-; SLM-NEXT: cmovpq (%rdx), %rdi # sched: [5:1.00]
-; SLM-NEXT: cmovnpq (%rdx), %rdi # sched: [5:1.00]
-; SLM-NEXT: cmovnpq (%rdx), %rdi # sched: [5:1.00]
-; SLM-NEXT: cmovlq (%rdx), %rdi # sched: [5:1.00]
-; SLM-NEXT: cmovlq (%rdx), %rdi # sched: [5:1.00]
-; SLM-NEXT: cmovgeq (%rdx), %rdi # sched: [5:1.00]
-; SLM-NEXT: cmovgeq (%rdx), %rdi # sched: [5:1.00]
-; SLM-NEXT: cmovleq (%rdx), %rdi # sched: [5:1.00]
-; SLM-NEXT: cmovleq (%rdx), %rdi # sched: [5:1.00]
-; SLM-NEXT: cmovgq (%rdx), %rdi # sched: [5:1.00]
-; SLM-NEXT: cmovgq (%rdx), %rdi # sched: [5:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_cmov_64:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: cmovoq %rsi, %rdi # sched: [2:0.67]
-; SANDY-NEXT: cmovnoq %rsi, %rdi # sched: [2:0.67]
-; SANDY-NEXT: cmovbq %rsi, %rdi # sched: [2:0.67]
-; SANDY-NEXT: cmovbq %rsi, %rdi # sched: [2:0.67]
-; SANDY-NEXT: cmovbq %rsi, %rdi # sched: [2:0.67]
-; SANDY-NEXT: cmovaeq %rsi, %rdi # sched: [2:0.67]
-; SANDY-NEXT: cmovaeq %rsi, %rdi # sched: [2:0.67]
-; SANDY-NEXT: cmovaeq %rsi, %rdi # sched: [2:0.67]
-; SANDY-NEXT: cmoveq %rsi, %rdi # sched: [2:0.67]
-; SANDY-NEXT: cmoveq %rsi, %rdi # sched: [2:0.67]
-; SANDY-NEXT: cmovneq %rsi, %rdi # sched: [2:0.67]
-; SANDY-NEXT: cmovneq %rsi, %rdi # sched: [2:0.67]
-; SANDY-NEXT: cmovbeq %rsi, %rdi # sched: [3:1.00]
-; SANDY-NEXT: cmovbeq %rsi, %rdi # sched: [3:1.00]
-; SANDY-NEXT: cmovaq %rsi, %rdi # sched: [3:1.00]
-; SANDY-NEXT: cmovaq %rsi, %rdi # sched: [3:1.00]
-; SANDY-NEXT: cmovsq %rsi, %rdi # sched: [2:0.67]
-; SANDY-NEXT: cmovnsq %rsi, %rdi # sched: [2:0.67]
-; SANDY-NEXT: cmovpq %rsi, %rdi # sched: [2:0.67]
-; SANDY-NEXT: cmovpq %rsi, %rdi # sched: [2:0.67]
-; SANDY-NEXT: cmovnpq %rsi, %rdi # sched: [2:0.67]
-; SANDY-NEXT: cmovnpq %rsi, %rdi # sched: [2:0.67]
-; SANDY-NEXT: cmovlq %rsi, %rdi # sched: [2:0.67]
-; SANDY-NEXT: cmovlq %rsi, %rdi # sched: [2:0.67]
-; SANDY-NEXT: cmovgeq %rsi, %rdi # sched: [2:0.67]
-; SANDY-NEXT: cmovgeq %rsi, %rdi # sched: [2:0.67]
-; SANDY-NEXT: cmovleq %rsi, %rdi # sched: [2:0.67]
-; SANDY-NEXT: cmovleq %rsi, %rdi # sched: [2:0.67]
-; SANDY-NEXT: cmovgq %rsi, %rdi # sched: [2:0.67]
-; SANDY-NEXT: cmovgq %rsi, %rdi # sched: [2:0.67]
-; SANDY-NEXT: cmovoq (%rdx), %rdi # sched: [7:0.67]
-; SANDY-NEXT: cmovnoq (%rdx), %rdi # sched: [7:0.67]
-; SANDY-NEXT: cmovbq (%rdx), %rdi # sched: [7:0.67]
-; SANDY-NEXT: cmovbq (%rdx), %rdi # sched: [7:0.67]
-; SANDY-NEXT: cmovbq (%rdx), %rdi # sched: [7:0.67]
-; SANDY-NEXT: cmovaeq (%rdx), %rdi # sched: [7:0.67]
-; SANDY-NEXT: cmovaeq (%rdx), %rdi # sched: [7:0.67]
-; SANDY-NEXT: cmovaeq (%rdx), %rdi # sched: [7:0.67]
-; SANDY-NEXT: cmoveq (%rdx), %rdi # sched: [7:0.67]
-; SANDY-NEXT: cmoveq (%rdx), %rdi # sched: [7:0.67]
-; SANDY-NEXT: cmovneq (%rdx), %rdi # sched: [7:0.67]
-; SANDY-NEXT: cmovneq (%rdx), %rdi # sched: [7:0.67]
-; SANDY-NEXT: cmovbeq (%rdx), %rdi # sched: [8:1.00]
-; SANDY-NEXT: cmovbeq (%rdx), %rdi # sched: [8:1.00]
-; SANDY-NEXT: cmovaq (%rdx), %rdi # sched: [8:1.00]
-; SANDY-NEXT: cmovaq (%rdx), %rdi # sched: [8:1.00]
-; SANDY-NEXT: cmovsq (%rdx), %rdi # sched: [7:0.67]
-; SANDY-NEXT: cmovnsq (%rdx), %rdi # sched: [7:0.67]
-; SANDY-NEXT: cmovpq (%rdx), %rdi # sched: [7:0.67]
-; SANDY-NEXT: cmovpq (%rdx), %rdi # sched: [7:0.67]
-; SANDY-NEXT: cmovnpq (%rdx), %rdi # sched: [7:0.67]
-; SANDY-NEXT: cmovnpq (%rdx), %rdi # sched: [7:0.67]
-; SANDY-NEXT: cmovlq (%rdx), %rdi # sched: [7:0.67]
-; SANDY-NEXT: cmovlq (%rdx), %rdi # sched: [7:0.67]
-; SANDY-NEXT: cmovgeq (%rdx), %rdi # sched: [7:0.67]
-; SANDY-NEXT: cmovgeq (%rdx), %rdi # sched: [7:0.67]
-; SANDY-NEXT: cmovleq (%rdx), %rdi # sched: [7:0.67]
-; SANDY-NEXT: cmovleq (%rdx), %rdi # sched: [7:0.67]
-; SANDY-NEXT: cmovgq (%rdx), %rdi # sched: [7:0.67]
-; SANDY-NEXT: cmovgq (%rdx), %rdi # sched: [7:0.67]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_cmov_64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: cmovoq %rsi, %rdi # sched: [2:0.50]
-; HASWELL-NEXT: cmovnoq %rsi, %rdi # sched: [2:0.50]
-; HASWELL-NEXT: cmovbq %rsi, %rdi # sched: [2:0.50]
-; HASWELL-NEXT: cmovbq %rsi, %rdi # sched: [2:0.50]
-; HASWELL-NEXT: cmovbq %rsi, %rdi # sched: [2:0.50]
-; HASWELL-NEXT: cmovaeq %rsi, %rdi # sched: [2:0.50]
-; HASWELL-NEXT: cmovaeq %rsi, %rdi # sched: [2:0.50]
-; HASWELL-NEXT: cmovaeq %rsi, %rdi # sched: [2:0.50]
-; HASWELL-NEXT: cmoveq %rsi, %rdi # sched: [2:0.50]
-; HASWELL-NEXT: cmoveq %rsi, %rdi # sched: [2:0.50]
-; HASWELL-NEXT: cmovneq %rsi, %rdi # sched: [2:0.50]
-; HASWELL-NEXT: cmovneq %rsi, %rdi # sched: [2:0.50]
-; HASWELL-NEXT: cmovbeq %rsi, %rdi # sched: [3:0.75]
-; HASWELL-NEXT: cmovbeq %rsi, %rdi # sched: [3:0.75]
-; HASWELL-NEXT: cmovaq %rsi, %rdi # sched: [3:0.75]
-; HASWELL-NEXT: cmovaq %rsi, %rdi # sched: [3:0.75]
-; HASWELL-NEXT: cmovsq %rsi, %rdi # sched: [2:0.50]
-; HASWELL-NEXT: cmovnsq %rsi, %rdi # sched: [2:0.50]
-; HASWELL-NEXT: cmovpq %rsi, %rdi # sched: [2:0.50]
-; HASWELL-NEXT: cmovpq %rsi, %rdi # sched: [2:0.50]
-; HASWELL-NEXT: cmovnpq %rsi, %rdi # sched: [2:0.50]
-; HASWELL-NEXT: cmovnpq %rsi, %rdi # sched: [2:0.50]
-; HASWELL-NEXT: cmovlq %rsi, %rdi # sched: [2:0.50]
-; HASWELL-NEXT: cmovlq %rsi, %rdi # sched: [2:0.50]
-; HASWELL-NEXT: cmovgeq %rsi, %rdi # sched: [2:0.50]
-; HASWELL-NEXT: cmovgeq %rsi, %rdi # sched: [2:0.50]
-; HASWELL-NEXT: cmovleq %rsi, %rdi # sched: [2:0.50]
-; HASWELL-NEXT: cmovleq %rsi, %rdi # sched: [2:0.50]
-; HASWELL-NEXT: cmovgq %rsi, %rdi # sched: [2:0.50]
-; HASWELL-NEXT: cmovgq %rsi, %rdi # sched: [2:0.50]
-; HASWELL-NEXT: cmovoq (%rdx), %rdi # sched: [7:0.50]
-; HASWELL-NEXT: cmovnoq (%rdx), %rdi # sched: [7:0.50]
-; HASWELL-NEXT: cmovbq (%rdx), %rdi # sched: [7:0.50]
-; HASWELL-NEXT: cmovbq (%rdx), %rdi # sched: [7:0.50]
-; HASWELL-NEXT: cmovbq (%rdx), %rdi # sched: [7:0.50]
-; HASWELL-NEXT: cmovaeq (%rdx), %rdi # sched: [7:0.50]
-; HASWELL-NEXT: cmovaeq (%rdx), %rdi # sched: [7:0.50]
-; HASWELL-NEXT: cmovaeq (%rdx), %rdi # sched: [7:0.50]
-; HASWELL-NEXT: cmoveq (%rdx), %rdi # sched: [7:0.50]
-; HASWELL-NEXT: cmoveq (%rdx), %rdi # sched: [7:0.50]
-; HASWELL-NEXT: cmovneq (%rdx), %rdi # sched: [7:0.50]
-; HASWELL-NEXT: cmovneq (%rdx), %rdi # sched: [7:0.50]
-; HASWELL-NEXT: cmovbeq (%rdx), %rdi # sched: [8:0.75]
-; HASWELL-NEXT: cmovbeq (%rdx), %rdi # sched: [8:0.75]
-; HASWELL-NEXT: cmovaq (%rdx), %rdi # sched: [8:0.75]
-; HASWELL-NEXT: cmovaq (%rdx), %rdi # sched: [8:0.75]
-; HASWELL-NEXT: cmovsq (%rdx), %rdi # sched: [7:0.50]
-; HASWELL-NEXT: cmovnsq (%rdx), %rdi # sched: [7:0.50]
-; HASWELL-NEXT: cmovpq (%rdx), %rdi # sched: [7:0.50]
-; HASWELL-NEXT: cmovpq (%rdx), %rdi # sched: [7:0.50]
-; HASWELL-NEXT: cmovnpq (%rdx), %rdi # sched: [7:0.50]
-; HASWELL-NEXT: cmovnpq (%rdx), %rdi # sched: [7:0.50]
-; HASWELL-NEXT: cmovlq (%rdx), %rdi # sched: [7:0.50]
-; HASWELL-NEXT: cmovlq (%rdx), %rdi # sched: [7:0.50]
-; HASWELL-NEXT: cmovgeq (%rdx), %rdi # sched: [7:0.50]
-; HASWELL-NEXT: cmovgeq (%rdx), %rdi # sched: [7:0.50]
-; HASWELL-NEXT: cmovleq (%rdx), %rdi # sched: [7:0.50]
-; HASWELL-NEXT: cmovleq (%rdx), %rdi # sched: [7:0.50]
-; HASWELL-NEXT: cmovgq (%rdx), %rdi # sched: [7:0.50]
-; HASWELL-NEXT: cmovgq (%rdx), %rdi # sched: [7:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cmov_64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: cmovoq %rsi, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovnoq %rsi, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: cmoveq %rsi, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: cmoveq %rsi, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovneq %rsi, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovneq %rsi, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovbeq %rsi, %rdi # sched: [2:0.50]
-; BROADWELL-NEXT: cmovbeq %rsi, %rdi # sched: [2:0.50]
-; BROADWELL-NEXT: cmovaq %rsi, %rdi # sched: [2:0.50]
-; BROADWELL-NEXT: cmovaq %rsi, %rdi # sched: [2:0.50]
-; BROADWELL-NEXT: cmovsq %rsi, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovnsq %rsi, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovpq %rsi, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovpq %rsi, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovnpq %rsi, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovnpq %rsi, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovlq %rsi, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovlq %rsi, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovgeq %rsi, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovgeq %rsi, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovleq %rsi, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovleq %rsi, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovgq %rsi, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovgq %rsi, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: cmovoq (%rdx), %rdi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovnoq (%rdx), %rdi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovbq (%rdx), %rdi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovbq (%rdx), %rdi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovbq (%rdx), %rdi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovaeq (%rdx), %rdi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovaeq (%rdx), %rdi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovaeq (%rdx), %rdi # sched: [6:0.50]
-; BROADWELL-NEXT: cmoveq (%rdx), %rdi # sched: [6:0.50]
-; BROADWELL-NEXT: cmoveq (%rdx), %rdi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovneq (%rdx), %rdi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovneq (%rdx), %rdi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovbeq (%rdx), %rdi # sched: [7:0.50]
-; BROADWELL-NEXT: cmovbeq (%rdx), %rdi # sched: [7:0.50]
-; BROADWELL-NEXT: cmovaq (%rdx), %rdi # sched: [7:0.50]
-; BROADWELL-NEXT: cmovaq (%rdx), %rdi # sched: [7:0.50]
-; BROADWELL-NEXT: cmovsq (%rdx), %rdi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovnsq (%rdx), %rdi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovpq (%rdx), %rdi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovpq (%rdx), %rdi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovnpq (%rdx), %rdi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovnpq (%rdx), %rdi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovlq (%rdx), %rdi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovlq (%rdx), %rdi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovgeq (%rdx), %rdi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovgeq (%rdx), %rdi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovleq (%rdx), %rdi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovleq (%rdx), %rdi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovgq (%rdx), %rdi # sched: [6:0.50]
-; BROADWELL-NEXT: cmovgq (%rdx), %rdi # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cmov_64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: cmovoq %rsi, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovnoq %rsi, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmoveq %rsi, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmoveq %rsi, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovneq %rsi, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovneq %rsi, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovbeq %rsi, %rdi # sched: [2:1.00]
-; SKYLAKE-NEXT: cmovbeq %rsi, %rdi # sched: [2:1.00]
-; SKYLAKE-NEXT: cmovaq %rsi, %rdi # sched: [2:1.00]
-; SKYLAKE-NEXT: cmovaq %rsi, %rdi # sched: [2:1.00]
-; SKYLAKE-NEXT: cmovsq %rsi, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovnsq %rsi, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovpq %rsi, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovpq %rsi, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovnpq %rsi, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovnpq %rsi, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovlq %rsi, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovlq %rsi, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovgeq %rsi, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovgeq %rsi, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovleq %rsi, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovleq %rsi, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovgq %rsi, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovgq %rsi, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: cmovoq (%rdx), %rdi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovnoq (%rdx), %rdi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovbq (%rdx), %rdi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovbq (%rdx), %rdi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovbq (%rdx), %rdi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovaeq (%rdx), %rdi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovaeq (%rdx), %rdi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovaeq (%rdx), %rdi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmoveq (%rdx), %rdi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmoveq (%rdx), %rdi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovneq (%rdx), %rdi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovneq (%rdx), %rdi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovbeq (%rdx), %rdi # sched: [7:1.00]
-; SKYLAKE-NEXT: cmovbeq (%rdx), %rdi # sched: [7:1.00]
-; SKYLAKE-NEXT: cmovaq (%rdx), %rdi # sched: [7:1.00]
-; SKYLAKE-NEXT: cmovaq (%rdx), %rdi # sched: [7:1.00]
-; SKYLAKE-NEXT: cmovsq (%rdx), %rdi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovnsq (%rdx), %rdi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovpq (%rdx), %rdi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovpq (%rdx), %rdi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovnpq (%rdx), %rdi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovnpq (%rdx), %rdi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovlq (%rdx), %rdi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovlq (%rdx), %rdi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovgeq (%rdx), %rdi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovgeq (%rdx), %rdi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovleq (%rdx), %rdi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovleq (%rdx), %rdi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovgq (%rdx), %rdi # sched: [6:0.50]
-; SKYLAKE-NEXT: cmovgq (%rdx), %rdi # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cmov_64:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: cmovoq %rsi, %rdi # sched: [1:0.50]
-; SKX-NEXT: cmovnoq %rsi, %rdi # sched: [1:0.50]
-; SKX-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50]
-; SKX-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50]
-; SKX-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50]
-; SKX-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50]
-; SKX-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50]
-; SKX-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50]
-; SKX-NEXT: cmoveq %rsi, %rdi # sched: [1:0.50]
-; SKX-NEXT: cmoveq %rsi, %rdi # sched: [1:0.50]
-; SKX-NEXT: cmovneq %rsi, %rdi # sched: [1:0.50]
-; SKX-NEXT: cmovneq %rsi, %rdi # sched: [1:0.50]
-; SKX-NEXT: cmovbeq %rsi, %rdi # sched: [2:1.00]
-; SKX-NEXT: cmovbeq %rsi, %rdi # sched: [2:1.00]
-; SKX-NEXT: cmovaq %rsi, %rdi # sched: [2:1.00]
-; SKX-NEXT: cmovaq %rsi, %rdi # sched: [2:1.00]
-; SKX-NEXT: cmovsq %rsi, %rdi # sched: [1:0.50]
-; SKX-NEXT: cmovnsq %rsi, %rdi # sched: [1:0.50]
-; SKX-NEXT: cmovpq %rsi, %rdi # sched: [1:0.50]
-; SKX-NEXT: cmovpq %rsi, %rdi # sched: [1:0.50]
-; SKX-NEXT: cmovnpq %rsi, %rdi # sched: [1:0.50]
-; SKX-NEXT: cmovnpq %rsi, %rdi # sched: [1:0.50]
-; SKX-NEXT: cmovlq %rsi, %rdi # sched: [1:0.50]
-; SKX-NEXT: cmovlq %rsi, %rdi # sched: [1:0.50]
-; SKX-NEXT: cmovgeq %rsi, %rdi # sched: [1:0.50]
-; SKX-NEXT: cmovgeq %rsi, %rdi # sched: [1:0.50]
-; SKX-NEXT: cmovleq %rsi, %rdi # sched: [1:0.50]
-; SKX-NEXT: cmovleq %rsi, %rdi # sched: [1:0.50]
-; SKX-NEXT: cmovgq %rsi, %rdi # sched: [1:0.50]
-; SKX-NEXT: cmovgq %rsi, %rdi # sched: [1:0.50]
-; SKX-NEXT: cmovoq (%rdx), %rdi # sched: [6:0.50]
-; SKX-NEXT: cmovnoq (%rdx), %rdi # sched: [6:0.50]
-; SKX-NEXT: cmovbq (%rdx), %rdi # sched: [6:0.50]
-; SKX-NEXT: cmovbq (%rdx), %rdi # sched: [6:0.50]
-; SKX-NEXT: cmovbq (%rdx), %rdi # sched: [6:0.50]
-; SKX-NEXT: cmovaeq (%rdx), %rdi # sched: [6:0.50]
-; SKX-NEXT: cmovaeq (%rdx), %rdi # sched: [6:0.50]
-; SKX-NEXT: cmovaeq (%rdx), %rdi # sched: [6:0.50]
-; SKX-NEXT: cmoveq (%rdx), %rdi # sched: [6:0.50]
-; SKX-NEXT: cmoveq (%rdx), %rdi # sched: [6:0.50]
-; SKX-NEXT: cmovneq (%rdx), %rdi # sched: [6:0.50]
-; SKX-NEXT: cmovneq (%rdx), %rdi # sched: [6:0.50]
-; SKX-NEXT: cmovbeq (%rdx), %rdi # sched: [7:1.00]
-; SKX-NEXT: cmovbeq (%rdx), %rdi # sched: [7:1.00]
-; SKX-NEXT: cmovaq (%rdx), %rdi # sched: [7:1.00]
-; SKX-NEXT: cmovaq (%rdx), %rdi # sched: [7:1.00]
-; SKX-NEXT: cmovsq (%rdx), %rdi # sched: [6:0.50]
-; SKX-NEXT: cmovnsq (%rdx), %rdi # sched: [6:0.50]
-; SKX-NEXT: cmovpq (%rdx), %rdi # sched: [6:0.50]
-; SKX-NEXT: cmovpq (%rdx), %rdi # sched: [6:0.50]
-; SKX-NEXT: cmovnpq (%rdx), %rdi # sched: [6:0.50]
-; SKX-NEXT: cmovnpq (%rdx), %rdi # sched: [6:0.50]
-; SKX-NEXT: cmovlq (%rdx), %rdi # sched: [6:0.50]
-; SKX-NEXT: cmovlq (%rdx), %rdi # sched: [6:0.50]
-; SKX-NEXT: cmovgeq (%rdx), %rdi # sched: [6:0.50]
-; SKX-NEXT: cmovgeq (%rdx), %rdi # sched: [6:0.50]
-; SKX-NEXT: cmovleq (%rdx), %rdi # sched: [6:0.50]
-; SKX-NEXT: cmovleq (%rdx), %rdi # sched: [6:0.50]
-; SKX-NEXT: cmovgq (%rdx), %rdi # sched: [6:0.50]
-; SKX-NEXT: cmovgq (%rdx), %rdi # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_cmov_64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: cmovoq %rsi, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: cmovnoq %rsi, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: cmoveq %rsi, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: cmoveq %rsi, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: cmovneq %rsi, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: cmovneq %rsi, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: cmovbeq %rsi, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: cmovbeq %rsi, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: cmovaq %rsi, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: cmovaq %rsi, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: cmovsq %rsi, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: cmovnsq %rsi, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: cmovpq %rsi, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: cmovpq %rsi, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: cmovnpq %rsi, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: cmovnpq %rsi, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: cmovlq %rsi, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: cmovlq %rsi, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: cmovgeq %rsi, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: cmovgeq %rsi, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: cmovleq %rsi, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: cmovleq %rsi, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: cmovgq %rsi, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: cmovgq %rsi, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: cmovoq (%rdx), %rdi # sched: [5:0.50]
-; BDVER2-NEXT: cmovnoq (%rdx), %rdi # sched: [5:0.50]
-; BDVER2-NEXT: cmovbq (%rdx), %rdi # sched: [5:0.50]
-; BDVER2-NEXT: cmovbq (%rdx), %rdi # sched: [5:0.50]
-; BDVER2-NEXT: cmovbq (%rdx), %rdi # sched: [5:0.50]
-; BDVER2-NEXT: cmovaeq (%rdx), %rdi # sched: [5:0.50]
-; BDVER2-NEXT: cmovaeq (%rdx), %rdi # sched: [5:0.50]
-; BDVER2-NEXT: cmovaeq (%rdx), %rdi # sched: [5:0.50]
-; BDVER2-NEXT: cmoveq (%rdx), %rdi # sched: [5:0.50]
-; BDVER2-NEXT: cmoveq (%rdx), %rdi # sched: [5:0.50]
-; BDVER2-NEXT: cmovneq (%rdx), %rdi # sched: [5:0.50]
-; BDVER2-NEXT: cmovneq (%rdx), %rdi # sched: [5:0.50]
-; BDVER2-NEXT: cmovbeq (%rdx), %rdi # sched: [5:0.50]
-; BDVER2-NEXT: cmovbeq (%rdx), %rdi # sched: [5:0.50]
-; BDVER2-NEXT: cmovaq (%rdx), %rdi # sched: [5:0.50]
-; BDVER2-NEXT: cmovaq (%rdx), %rdi # sched: [5:0.50]
-; BDVER2-NEXT: cmovsq (%rdx), %rdi # sched: [5:0.50]
-; BDVER2-NEXT: cmovnsq (%rdx), %rdi # sched: [5:0.50]
-; BDVER2-NEXT: cmovpq (%rdx), %rdi # sched: [5:0.50]
-; BDVER2-NEXT: cmovpq (%rdx), %rdi # sched: [5:0.50]
-; BDVER2-NEXT: cmovnpq (%rdx), %rdi # sched: [5:0.50]
-; BDVER2-NEXT: cmovnpq (%rdx), %rdi # sched: [5:0.50]
-; BDVER2-NEXT: cmovlq (%rdx), %rdi # sched: [5:0.50]
-; BDVER2-NEXT: cmovlq (%rdx), %rdi # sched: [5:0.50]
-; BDVER2-NEXT: cmovgeq (%rdx), %rdi # sched: [5:0.50]
-; BDVER2-NEXT: cmovgeq (%rdx), %rdi # sched: [5:0.50]
-; BDVER2-NEXT: cmovleq (%rdx), %rdi # sched: [5:0.50]
-; BDVER2-NEXT: cmovleq (%rdx), %rdi # sched: [5:0.50]
-; BDVER2-NEXT: cmovgq (%rdx), %rdi # sched: [5:0.50]
-; BDVER2-NEXT: cmovgq (%rdx), %rdi # sched: [5:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_cmov_64:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: cmovoq %rsi, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: cmovnoq %rsi, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: cmovbq %rsi, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: cmoveq %rsi, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: cmoveq %rsi, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: cmovneq %rsi, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: cmovneq %rsi, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: cmovbeq %rsi, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: cmovbeq %rsi, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: cmovaq %rsi, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: cmovaq %rsi, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: cmovsq %rsi, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: cmovnsq %rsi, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: cmovpq %rsi, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: cmovpq %rsi, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: cmovnpq %rsi, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: cmovnpq %rsi, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: cmovlq %rsi, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: cmovlq %rsi, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: cmovgeq %rsi, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: cmovgeq %rsi, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: cmovleq %rsi, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: cmovleq %rsi, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: cmovgq %rsi, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: cmovgq %rsi, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: cmovoq (%rdx), %rdi # sched: [4:1.00]
-; BTVER2-NEXT: cmovnoq (%rdx), %rdi # sched: [4:1.00]
-; BTVER2-NEXT: cmovbq (%rdx), %rdi # sched: [4:1.00]
-; BTVER2-NEXT: cmovbq (%rdx), %rdi # sched: [4:1.00]
-; BTVER2-NEXT: cmovbq (%rdx), %rdi # sched: [4:1.00]
-; BTVER2-NEXT: cmovaeq (%rdx), %rdi # sched: [4:1.00]
-; BTVER2-NEXT: cmovaeq (%rdx), %rdi # sched: [4:1.00]
-; BTVER2-NEXT: cmovaeq (%rdx), %rdi # sched: [4:1.00]
-; BTVER2-NEXT: cmoveq (%rdx), %rdi # sched: [4:1.00]
-; BTVER2-NEXT: cmoveq (%rdx), %rdi # sched: [4:1.00]
-; BTVER2-NEXT: cmovneq (%rdx), %rdi # sched: [4:1.00]
-; BTVER2-NEXT: cmovneq (%rdx), %rdi # sched: [4:1.00]
-; BTVER2-NEXT: cmovbeq (%rdx), %rdi # sched: [4:1.00]
-; BTVER2-NEXT: cmovbeq (%rdx), %rdi # sched: [4:1.00]
-; BTVER2-NEXT: cmovaq (%rdx), %rdi # sched: [4:1.00]
-; BTVER2-NEXT: cmovaq (%rdx), %rdi # sched: [4:1.00]
-; BTVER2-NEXT: cmovsq (%rdx), %rdi # sched: [4:1.00]
-; BTVER2-NEXT: cmovnsq (%rdx), %rdi # sched: [4:1.00]
-; BTVER2-NEXT: cmovpq (%rdx), %rdi # sched: [4:1.00]
-; BTVER2-NEXT: cmovpq (%rdx), %rdi # sched: [4:1.00]
-; BTVER2-NEXT: cmovnpq (%rdx), %rdi # sched: [4:1.00]
-; BTVER2-NEXT: cmovnpq (%rdx), %rdi # sched: [4:1.00]
-; BTVER2-NEXT: cmovlq (%rdx), %rdi # sched: [4:1.00]
-; BTVER2-NEXT: cmovlq (%rdx), %rdi # sched: [4:1.00]
-; BTVER2-NEXT: cmovgeq (%rdx), %rdi # sched: [4:1.00]
-; BTVER2-NEXT: cmovgeq (%rdx), %rdi # sched: [4:1.00]
-; BTVER2-NEXT: cmovleq (%rdx), %rdi # sched: [4:1.00]
-; BTVER2-NEXT: cmovleq (%rdx), %rdi # sched: [4:1.00]
-; BTVER2-NEXT: cmovgq (%rdx), %rdi # sched: [4:1.00]
-; BTVER2-NEXT: cmovgq (%rdx), %rdi # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_cmov_64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: cmovoq %rsi, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovnoq %rsi, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovbq %rsi, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovbq %rsi, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovbq %rsi, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovaeq %rsi, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: cmoveq %rsi, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: cmoveq %rsi, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovneq %rsi, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovneq %rsi, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovbeq %rsi, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovbeq %rsi, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovaq %rsi, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovaq %rsi, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovsq %rsi, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovnsq %rsi, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovpq %rsi, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovpq %rsi, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovnpq %rsi, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovnpq %rsi, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovlq %rsi, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovlq %rsi, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovgeq %rsi, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovgeq %rsi, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovleq %rsi, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovleq %rsi, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovgq %rsi, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovgq %rsi, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: cmovoq (%rdx), %rdi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovnoq (%rdx), %rdi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovbq (%rdx), %rdi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovbq (%rdx), %rdi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovbq (%rdx), %rdi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovaeq (%rdx), %rdi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovaeq (%rdx), %rdi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovaeq (%rdx), %rdi # sched: [5:0.50]
-; ZNVER1-NEXT: cmoveq (%rdx), %rdi # sched: [5:0.50]
-; ZNVER1-NEXT: cmoveq (%rdx), %rdi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovneq (%rdx), %rdi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovneq (%rdx), %rdi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovbeq (%rdx), %rdi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovbeq (%rdx), %rdi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovaq (%rdx), %rdi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovaq (%rdx), %rdi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovsq (%rdx), %rdi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovnsq (%rdx), %rdi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovpq (%rdx), %rdi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovpq (%rdx), %rdi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovnpq (%rdx), %rdi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovnpq (%rdx), %rdi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovlq (%rdx), %rdi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovlq (%rdx), %rdi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovgeq (%rdx), %rdi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovgeq (%rdx), %rdi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovleq (%rdx), %rdi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovleq (%rdx), %rdi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovgq (%rdx), %rdi # sched: [5:0.50]
-; ZNVER1-NEXT: cmovgq (%rdx), %rdi # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "cmovoq $1, $0 \0A\09 cmovnoq $1, $0 \0A\09 cmovbq $1, $0 \0A\09 cmovcq $1, $0 \0A\09 cmovnaeq $1, $0 \0A\09 cmovnbq $1, $0 \0A\09 cmovncq $1, $0 \0A\09 cmovaeq $1, $0 \0A\09 cmovzq $1, $0 \0A\09 cmoveq $1, $0 \0A\09 cmovnzq $1, $0 \0A\09 cmovneq $1, $0 \0A\09 cmovbeq $1, $0 \0A\09 cmovnaq $1, $0 \0A\09 cmovnbeq $1, $0 \0A\09 cmovaq $1, $0 \0A\09 cmovsq $1, $0 \0A\09 cmovnsq $1, $0 \0A\09 cmovpq $1, $0 \0A\09 cmovpeq $1, $0 \0A\09 cmovnpq $1, $0 \0A\09 cmovpoq $1, $0 \0A\09 cmovlq $1, $0 \0A\09 cmovngeq $1, $0 \0A\09 cmovnlq $1, $0 \0A\09 cmovgeq $1, $0 \0A\09 cmovleq $1, $0 \0A\09 cmovngq $1, $0 \0A\09 cmovnleq $1, $0 \0A\09 cmovgq $1, $0 \0A\09 cmovoq $2, $0 \0A\09 cmovnoq $2, $0 \0A\09 cmovbq $2, $0 \0A\09 cmovcq $2, $0 \0A\09 cmovnaeq $2, $0 \0A\09 cmovnbq $2, $0 \0A\09 cmovncq $2, $0 \0A\09 cmovaeq $2, $0 \0A\09 cmovzq $2, $0 \0A\09 cmoveq $2, $0 \0A\09 cmovnzq $2, $0 \0A\09 cmovneq $2, $0 \0A\09 cmovbeq $2, $0 \0A\09 cmovnaq $2, $0 \0A\09 cmovnbeq $2, $0 \0A\09 cmovaq $2, $0 \0A\09 cmovsq $2, $0 \0A\09 cmovnsq $2, $0 \0A\09 cmovpq $2, $0 \0A\09 cmovpeq $2, $0 \0A\09 cmovnpq $2, $0 \0A\09 cmovpoq $2, $0 \0A\09 cmovlq $2, $0 \0A\09 cmovngeq $2, $0 \0A\09 cmovnlq $2, $0 \0A\09 cmovgeq $2, $0 \0A\09 cmovleq $2, $0 \0A\09 cmovngq $2, $0 \0A\09 cmovnleq $2, $0 \0A\09 cmovgq $2, $0", "r,r,*m"(i64 %a0, i64 %a1, i64 *%a2)
- ret void
-}
diff --git a/llvm/test/CodeGen/X86/f16c-schedule.ll b/llvm/test/CodeGen/X86/f16c-schedule.ll
deleted file mode 100644
index b4d73ce9e5a..00000000000
--- a/llvm/test/CodeGen/X86/f16c-schedule.ll
+++ /dev/null
@@ -1,255 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+f16c | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=IVY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
-
-define <4 x float> @test_vcvtph2ps_128(<8 x i16> %a0, <8 x i16> *%a1) {
-; GENERIC-LABEL: test_vcvtph2ps_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [8:1.00]
-; GENERIC-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; IVY-LABEL: test_vcvtph2ps_128:
-; IVY: # %bb.0:
-; IVY-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [8:1.00]
-; IVY-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [3:1.00]
-; IVY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; IVY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_vcvtph2ps_128:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [6:1.00]
-; HASWELL-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [2:1.00]
-; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_vcvtph2ps_128:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [6:1.00]
-; BROADWELL-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [2:1.00]
-; BROADWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_vcvtph2ps_128:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [9:0.50]
-; SKYLAKE-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [5:1.00]
-; SKYLAKE-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_vcvtph2ps_128:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [13:1.00]
-; BDVER2-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [8:1.00]
-; BDVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_vcvtph2ps_128:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [8:1.00]
-; BTVER2-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_vcvtph2ps_128:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [100:0.25]
-; ZNVER1-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load <8 x i16>, <8 x i16> *%a1
- %2 = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %1)
- %3 = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %a0)
- %4 = fadd <4 x float> %2, %3
- ret <4 x float> %4
-}
-declare <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16>)
-
-define <8 x float> @test_vcvtph2ps_256(<8 x i16> %a0, <8 x i16> *%a1) {
-; GENERIC-LABEL: test_vcvtph2ps_256:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [8:1.00]
-; GENERIC-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; IVY-LABEL: test_vcvtph2ps_256:
-; IVY: # %bb.0:
-; IVY-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [8:1.00]
-; IVY-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [3:1.00]
-; IVY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; IVY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_vcvtph2ps_256:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [7:1.00]
-; HASWELL-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [2:1.00]
-; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_vcvtph2ps_256:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [6:1.00]
-; BROADWELL-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [2:1.00]
-; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_vcvtph2ps_256:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [10:0.50]
-; SKYLAKE-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [7:1.00]
-; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_vcvtph2ps_256:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [13:2.00]
-; BDVER2-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [8:2.00]
-; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_vcvtph2ps_256:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [8:2.00]
-; BTVER2-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_vcvtph2ps_256:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [100:0.25]
-; ZNVER1-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [100:0.25]
-; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load <8 x i16>, <8 x i16> *%a1
- %2 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %1)
- %3 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %a0)
- %4 = fadd <8 x float> %2, %3
- ret <8 x float> %4
-}
-declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>)
-
-define <8 x i16> @test_vcvtps2ph_128(<4 x float> %a0, <4 x float> %a1, <4 x i16> *%a2) {
-; GENERIC-LABEL: test_vcvtps2ph_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [4:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; IVY-LABEL: test_vcvtps2ph_128:
-; IVY: # %bb.0:
-; IVY-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00]
-; IVY-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [4:1.00]
-; IVY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_vcvtps2ph_128:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [4:1.00]
-; HASWELL-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [5:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_vcvtps2ph_128:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [4:1.00]
-; BROADWELL-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [5:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_vcvtps2ph_128:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [5:1.00]
-; SKYLAKE-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [6:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_vcvtps2ph_128:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [8:1.00]
-; BDVER2-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [4:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_vcvtps2ph_128:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_vcvtps2ph_128:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a0, i32 0)
- %2 = call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a1, i32 0)
- %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- store <4 x i16> %3, <4 x i16> *%a2
- ret <8 x i16> %1
-}
-declare <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float>, i32)
-
-define <8 x i16> @test_vcvtps2ph_256(<8 x float> %a0, <8 x float> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_vcvtps2ph_256:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [4:1.00]
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; IVY-LABEL: test_vcvtps2ph_256:
-; IVY: # %bb.0:
-; IVY-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00]
-; IVY-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [4:1.00]
-; IVY-NEXT: vzeroupper # sched: [1:1.00]
-; IVY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_vcvtps2ph_256:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [6:1.00]
-; HASWELL-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [7:1.00]
-; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_vcvtps2ph_256:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [6:1.00]
-; BROADWELL-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [7:1.00]
-; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_vcvtps2ph_256:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [7:1.00]
-; SKYLAKE-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [8:1.00]
-; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_vcvtps2ph_256:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [8:2.00]
-; BDVER2-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [4:2.00]
-; BDVER2-NEXT: vzeroupper # sched: [46:4.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_vcvtps2ph_256:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [6:2.00]
-; BTVER2-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [7:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_vcvtps2ph_256:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [100:0.25]
-; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a0, i32 0)
- %2 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a1, i32 0)
- store <8 x i16> %2, <8 x i16> *%a2
- ret <8 x i16> %1
-}
-declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32)
diff --git a/llvm/test/CodeGen/X86/fma-schedule.ll b/llvm/test/CodeGen/X86/fma-schedule.ll
deleted file mode 100644
index ce8e00e5bc0..00000000000
--- a/llvm/test/CodeGen/X86/fma-schedule.ll
+++ /dev/null
@@ -1,3317 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+fma | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
-
-;
-; VFMADD
-;
-
-define void @test_vfmaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
-; GENERIC-LABEL: test_vfmaddpd_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; GENERIC-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; GENERIC-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; GENERIC-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
-; GENERIC-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
-; GENERIC-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_vfmaddpd_128:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; BDVER2-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; BDVER2-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; BDVER2-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
-; BDVER2-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
-; BDVER2-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; HASWELL-LABEL: test_vfmaddpd_128:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [11:0.50]
-; HASWELL-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [11:0.50]
-; HASWELL-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [11:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_vfmaddpd_128:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; BROADWELL-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; BROADWELL-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; BROADWELL-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
-; BROADWELL-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
-; BROADWELL-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_vfmaddpd_128:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
-; SKYLAKE-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
-; SKYLAKE-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_vfmaddpd_128:
-; KNL: # %bb.0:
-; KNL-NEXT: #APP
-; KNL-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; KNL-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; KNL-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [11:0.50]
-; KNL-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [11:0.50]
-; KNL-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [11:0.50]
-; KNL-NEXT: #NO_APP
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_vfmaddpd_128:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
-; SKX-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
-; SKX-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
-; SKX-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
-; SKX-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
-; SKX-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_vfmaddpd_128:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; ZNVER1-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; ZNVER1-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; ZNVER1-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [12:0.50]
-; ZNVER1-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [12:0.50]
-; ZNVER1-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [12:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "vfmadd132pd $2, $1, $0 \0A\09 vfmadd213pd $2, $1, $0 \0A\09 vfmadd231pd $2, $1, $0 \0A\09 vfmadd132pd $3, $1, $0 \0A\09 vfmadd213pd $3, $1, $0 \0A\09 vfmadd231pd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
- ret void
-}
-
-define void @test_vfmaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize {
-; GENERIC-LABEL: test_vfmaddpd_256:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
-; GENERIC-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
-; GENERIC-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
-; GENERIC-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [10:0.50]
-; GENERIC-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [10:0.50]
-; GENERIC-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_vfmaddpd_256:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
-; BDVER2-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
-; BDVER2-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
-; BDVER2-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [10:1.00]
-; BDVER2-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [10:1.00]
-; BDVER2-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [10:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: vzeroupper # sched: [46:4.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; HASWELL-LABEL: test_vfmaddpd_256:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50]
-; HASWELL-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50]
-; HASWELL-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_vfmaddpd_256:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
-; BROADWELL-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
-; BROADWELL-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
-; BROADWELL-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50]
-; BROADWELL-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
-; BROADWELL-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_vfmaddpd_256:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50]
-; SKYLAKE-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
-; SKYLAKE-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_vfmaddpd_256:
-; KNL: # %bb.0:
-; KNL-NEXT: #APP
-; KNL-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
-; KNL-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
-; KNL-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50]
-; KNL-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50]
-; KNL-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50]
-; KNL-NEXT: #NO_APP
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_vfmaddpd_256:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.50]
-; SKX-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.50]
-; SKX-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.50]
-; SKX-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50]
-; SKX-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
-; SKX-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_vfmaddpd_256:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
-; ZNVER1-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
-; ZNVER1-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
-; ZNVER1-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50]
-; ZNVER1-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50]
-; ZNVER1-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "vfmadd132pd $2, $1, $0 \0A\09 vfmadd213pd $2, $1, $0 \0A\09 vfmadd231pd $2, $1, $0 \0A\09 vfmadd132pd $3, $1, $0 \0A\09 vfmadd213pd $3, $1, $0 \0A\09 vfmadd231pd $3, $1, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
- ret void
-}
-
-define void @test_vfmaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
-; GENERIC-LABEL: test_vfmaddps_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; GENERIC-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; GENERIC-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; GENERIC-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
-; GENERIC-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
-; GENERIC-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_vfmaddps_128:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; BDVER2-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; BDVER2-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; BDVER2-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
-; BDVER2-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
-; BDVER2-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; HASWELL-LABEL: test_vfmaddps_128:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [11:0.50]
-; HASWELL-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [11:0.50]
-; HASWELL-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [11:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_vfmaddps_128:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; BROADWELL-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; BROADWELL-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; BROADWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
-; BROADWELL-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
-; BROADWELL-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_vfmaddps_128:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
-; SKYLAKE-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
-; SKYLAKE-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_vfmaddps_128:
-; KNL: # %bb.0:
-; KNL-NEXT: #APP
-; KNL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; KNL-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [11:0.50]
-; KNL-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [11:0.50]
-; KNL-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [11:0.50]
-; KNL-NEXT: #NO_APP
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_vfmaddps_128:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
-; SKX-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
-; SKX-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
-; SKX-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
-; SKX-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_vfmaddps_128:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; ZNVER1-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; ZNVER1-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; ZNVER1-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [12:0.50]
-; ZNVER1-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [12:0.50]
-; ZNVER1-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [12:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "vfmadd132ps $2, $1, $0 \0A\09 vfmadd213ps $2, $1, $0 \0A\09 vfmadd231ps $2, $1, $0 \0A\09 vfmadd132ps $3, $1, $0 \0A\09 vfmadd213ps $3, $1, $0 \0A\09 vfmadd231ps $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
- ret void
-}
-
-define void @test_vfmaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize {
-; GENERIC-LABEL: test_vfmaddps_256:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
-; GENERIC-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
-; GENERIC-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
-; GENERIC-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [10:0.50]
-; GENERIC-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [10:0.50]
-; GENERIC-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_vfmaddps_256:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
-; BDVER2-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
-; BDVER2-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
-; BDVER2-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [10:1.00]
-; BDVER2-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [10:1.00]
-; BDVER2-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [10:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: vzeroupper # sched: [46:4.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; HASWELL-LABEL: test_vfmaddps_256:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50]
-; HASWELL-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50]
-; HASWELL-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_vfmaddps_256:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
-; BROADWELL-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
-; BROADWELL-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
-; BROADWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50]
-; BROADWELL-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
-; BROADWELL-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_vfmaddps_256:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50]
-; SKYLAKE-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
-; SKYLAKE-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_vfmaddps_256:
-; KNL: # %bb.0:
-; KNL-NEXT: #APP
-; KNL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
-; KNL-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50]
-; KNL-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50]
-; KNL-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50]
-; KNL-NEXT: #NO_APP
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_vfmaddps_256:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.50]
-; SKX-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.50]
-; SKX-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50]
-; SKX-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50]
-; SKX-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_vfmaddps_256:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
-; ZNVER1-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
-; ZNVER1-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
-; ZNVER1-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [12:0.50]
-; ZNVER1-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [12:0.50]
-; ZNVER1-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [12:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "vfmadd132ps $2, $1, $0 \0A\09 vfmadd213ps $2, $1, $0 \0A\09 vfmadd231ps $2, $1, $0 \0A\09 vfmadd132ps $3, $1, $0 \0A\09 vfmadd213ps $3, $1, $0 \0A\09 vfmadd231ps $3, $1, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
- ret void
-}
-
-define void @test_vfmaddsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
-; GENERIC-LABEL: test_vfmaddsd_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; GENERIC-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; GENERIC-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; GENERIC-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
-; GENERIC-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
-; GENERIC-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_vfmaddsd_128:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; BDVER2-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; BDVER2-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; BDVER2-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
-; BDVER2-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
-; BDVER2-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; HASWELL-LABEL: test_vfmaddsd_128:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
-; HASWELL-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
-; HASWELL-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_vfmaddsd_128:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; BROADWELL-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; BROADWELL-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; BROADWELL-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
-; BROADWELL-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
-; BROADWELL-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_vfmaddsd_128:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50]
-; SKYLAKE-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50]
-; SKYLAKE-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_vfmaddsd_128:
-; KNL: # %bb.0:
-; KNL-NEXT: #APP
-; KNL-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; KNL-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; KNL-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
-; KNL-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
-; KNL-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
-; KNL-NEXT: #NO_APP
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_vfmaddsd_128:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
-; SKX-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
-; SKX-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
-; SKX-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50]
-; SKX-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50]
-; SKX-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_vfmaddsd_128:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; ZNVER1-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; ZNVER1-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; ZNVER1-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [12:0.50]
-; ZNVER1-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [12:0.50]
-; ZNVER1-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [12:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "vfmadd132sd $2, $1, $0 \0A\09 vfmadd213sd $2, $1, $0 \0A\09 vfmadd231sd $2, $1, $0 \0A\09 vfmadd132sd $3, $1, $0 \0A\09 vfmadd213sd $3, $1, $0 \0A\09 vfmadd231sd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
- ret void
-}
-
-define void @test_vfmaddss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
-; GENERIC-LABEL: test_vfmaddss_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; GENERIC-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; GENERIC-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; GENERIC-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
-; GENERIC-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
-; GENERIC-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_vfmaddss_128:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; BDVER2-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; BDVER2-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; BDVER2-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
-; BDVER2-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
-; BDVER2-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; HASWELL-LABEL: test_vfmaddss_128:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
-; HASWELL-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
-; HASWELL-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_vfmaddss_128:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; BROADWELL-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; BROADWELL-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; BROADWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
-; BROADWELL-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
-; BROADWELL-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_vfmaddss_128:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50]
-; SKYLAKE-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50]
-; SKYLAKE-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_vfmaddss_128:
-; KNL: # %bb.0:
-; KNL-NEXT: #APP
-; KNL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; KNL-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
-; KNL-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
-; KNL-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
-; KNL-NEXT: #NO_APP
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_vfmaddss_128:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50]
-; SKX-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50]
-; SKX-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50]
-; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50]
-; SKX-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50]
-; SKX-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_vfmaddss_128:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; ZNVER1-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; ZNVER1-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; ZNVER1-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [12:0.50]
-; ZNVER1-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [12:0.50]
-; ZNVER1-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [12:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "vfmadd132ss $2, $1, $0 \0A\09 vfmadd213ss $2, $1, $0 \0A\09 vfmadd231ss $2, $1, $0 \0A\09 vfmadd132ss $3, $1, $0 \0A\09 vfmadd213ss $3, $1, $0 \0A\09 vfmadd231ss $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
- ret void
-}
-
-;
-; VFMADDSUB
-;
-
-define void @test_vfmaddsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
-; GENERIC-LABEL: test_vfmaddsubpd_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
-; GENERIC-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
-; GENERIC-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
-; GENERIC-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
-; GENERIC-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
-; GENERIC-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_vfmaddsubpd_128:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
-; BDVER2-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
-; BDVER2-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
-; BDVER2-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
-; BDVER2-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
-; BDVER2-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; HASWELL-LABEL: test_vfmaddsubpd_128:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
-; HASWELL-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
-; HASWELL-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [11:0.50]
-; HASWELL-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [11:0.50]
-; HASWELL-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [11:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_vfmaddsubpd_128:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
-; BROADWELL-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
-; BROADWELL-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
-; BROADWELL-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
-; BROADWELL-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
-; BROADWELL-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_vfmaddsubpd_128:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
-; SKYLAKE-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
-; SKYLAKE-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_vfmaddsubpd_128:
-; KNL: # %bb.0:
-; KNL-NEXT: #APP
-; KNL-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
-; KNL-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
-; KNL-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
-; KNL-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [11:0.50]
-; KNL-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [11:0.50]
-; KNL-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [11:0.50]
-; KNL-NEXT: #NO_APP
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_vfmaddsubpd_128:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.50]
-; SKX-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.50]
-; SKX-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.50]
-; SKX-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
-; SKX-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
-; SKX-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_vfmaddsubpd_128:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
-; ZNVER1-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
-; ZNVER1-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
-; ZNVER1-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [12:0.50]
-; ZNVER1-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [12:0.50]
-; ZNVER1-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [12:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "vfmaddsub132pd $2, $1, $0 \0A\09 vfmaddsub213pd $2, $1, $0 \0A\09 vfmaddsub231pd $2, $1, $0 \0A\09 vfmaddsub132pd $3, $1, $0 \0A\09 vfmaddsub213pd $3, $1, $0 \0A\09 vfmaddsub231pd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
- ret void
-}
-
-define void @test_vfmaddsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize {
-; GENERIC-LABEL: test_vfmaddsubpd_256:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
-; GENERIC-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
-; GENERIC-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
-; GENERIC-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [10:0.50]
-; GENERIC-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [10:0.50]
-; GENERIC-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_vfmaddsubpd_256:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
-; BDVER2-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
-; BDVER2-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
-; BDVER2-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [10:1.00]
-; BDVER2-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [10:1.00]
-; BDVER2-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [10:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: vzeroupper # sched: [46:4.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; HASWELL-LABEL: test_vfmaddsubpd_256:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
-; HASWELL-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
-; HASWELL-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [12:0.50]
-; HASWELL-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50]
-; HASWELL-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_vfmaddsubpd_256:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
-; BROADWELL-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
-; BROADWELL-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
-; BROADWELL-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50]
-; BROADWELL-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50]
-; BROADWELL-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_vfmaddsubpd_256:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50]
-; SKYLAKE-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50]
-; SKYLAKE-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_vfmaddsubpd_256:
-; KNL: # %bb.0:
-; KNL-NEXT: #APP
-; KNL-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
-; KNL-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
-; KNL-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
-; KNL-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [12:0.50]
-; KNL-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50]
-; KNL-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50]
-; KNL-NEXT: #NO_APP
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_vfmaddsubpd_256:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.50]
-; SKX-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.50]
-; SKX-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.50]
-; SKX-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50]
-; SKX-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50]
-; SKX-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_vfmaddsubpd_256:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
-; ZNVER1-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
-; ZNVER1-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
-; ZNVER1-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [12:0.50]
-; ZNVER1-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50]
-; ZNVER1-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "vfmaddsub132pd $2, $1, $0 \0A\09 vfmaddsub213pd $2, $1, $0 \0A\09 vfmaddsub231pd $2, $1, $0 \0A\09 vfmaddsub132pd $3, $1, $0 \0A\09 vfmaddsub213pd $3, $1, $0 \0A\09 vfmaddsub231pd $3, $1, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
- ret void
-}
-
-define void @test_vfmaddsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
-; GENERIC-LABEL: test_vfmaddsubps_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
-; GENERIC-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
-; GENERIC-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
-; GENERIC-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
-; GENERIC-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
-; GENERIC-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_vfmaddsubps_128:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
-; BDVER2-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
-; BDVER2-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
-; BDVER2-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
-; BDVER2-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
-; BDVER2-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; HASWELL-LABEL: test_vfmaddsubps_128:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
-; HASWELL-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
-; HASWELL-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [11:0.50]
-; HASWELL-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [11:0.50]
-; HASWELL-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [11:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_vfmaddsubps_128:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
-; BROADWELL-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
-; BROADWELL-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
-; BROADWELL-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
-; BROADWELL-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
-; BROADWELL-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_vfmaddsubps_128:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
-; SKYLAKE-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
-; SKYLAKE-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_vfmaddsubps_128:
-; KNL: # %bb.0:
-; KNL-NEXT: #APP
-; KNL-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
-; KNL-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
-; KNL-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
-; KNL-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [11:0.50]
-; KNL-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [11:0.50]
-; KNL-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [11:0.50]
-; KNL-NEXT: #NO_APP
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_vfmaddsubps_128:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.50]
-; SKX-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.50]
-; SKX-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.50]
-; SKX-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
-; SKX-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
-; SKX-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_vfmaddsubps_128:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
-; ZNVER1-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
-; ZNVER1-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
-; ZNVER1-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [12:0.50]
-; ZNVER1-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [12:0.50]
-; ZNVER1-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [12:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "vfmaddsub132ps $2, $1, $0 \0A\09 vfmaddsub213ps $2, $1, $0 \0A\09 vfmaddsub231ps $2, $1, $0 \0A\09 vfmaddsub132ps $3, $1, $0 \0A\09 vfmaddsub213ps $3, $1, $0 \0A\09 vfmaddsub231ps $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
- ret void
-}
-
-define void @test_vfmaddsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize {
-; GENERIC-LABEL: test_vfmaddsubps_256:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
-; GENERIC-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
-; GENERIC-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
-; GENERIC-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [10:0.50]
-; GENERIC-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [10:0.50]
-; GENERIC-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_vfmaddsubps_256:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
-; BDVER2-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
-; BDVER2-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
-; BDVER2-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [10:1.00]
-; BDVER2-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [10:1.00]
-; BDVER2-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [10:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: vzeroupper # sched: [46:4.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; HASWELL-LABEL: test_vfmaddsubps_256:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
-; HASWELL-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
-; HASWELL-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [12:0.50]
-; HASWELL-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50]
-; HASWELL-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_vfmaddsubps_256:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
-; BROADWELL-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
-; BROADWELL-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
-; BROADWELL-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50]
-; BROADWELL-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50]
-; BROADWELL-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_vfmaddsubps_256:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50]
-; SKYLAKE-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50]
-; SKYLAKE-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_vfmaddsubps_256:
-; KNL: # %bb.0:
-; KNL-NEXT: #APP
-; KNL-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
-; KNL-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
-; KNL-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
-; KNL-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [12:0.50]
-; KNL-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50]
-; KNL-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50]
-; KNL-NEXT: #NO_APP
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_vfmaddsubps_256:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.50]
-; SKX-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.50]
-; SKX-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.50]
-; SKX-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50]
-; SKX-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50]
-; SKX-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_vfmaddsubps_256:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
-; ZNVER1-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
-; ZNVER1-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
-; ZNVER1-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [12:0.50]
-; ZNVER1-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [12:0.50]
-; ZNVER1-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [12:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "vfmaddsub132ps $2, $1, $0 \0A\09 vfmaddsub213ps $2, $1, $0 \0A\09 vfmaddsub231ps $2, $1, $0 \0A\09 vfmaddsub132ps $3, $1, $0 \0A\09 vfmaddsub213ps $3, $1, $0 \0A\09 vfmaddsub231ps $3, $1, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
- ret void
-}
-
-;
-; VFMSUBADD
-;
-
-define void @test_vfmsubaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
-; GENERIC-LABEL: test_vfmsubaddpd_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
-; GENERIC-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
-; GENERIC-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
-; GENERIC-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
-; GENERIC-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
-; GENERIC-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_vfmsubaddpd_128:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
-; BDVER2-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
-; BDVER2-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
-; BDVER2-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
-; BDVER2-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
-; BDVER2-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; HASWELL-LABEL: test_vfmsubaddpd_128:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
-; HASWELL-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
-; HASWELL-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [11:0.50]
-; HASWELL-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [11:0.50]
-; HASWELL-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [11:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_vfmsubaddpd_128:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
-; BROADWELL-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
-; BROADWELL-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
-; BROADWELL-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
-; BROADWELL-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
-; BROADWELL-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_vfmsubaddpd_128:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
-; SKYLAKE-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
-; SKYLAKE-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_vfmsubaddpd_128:
-; KNL: # %bb.0:
-; KNL-NEXT: #APP
-; KNL-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
-; KNL-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
-; KNL-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
-; KNL-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [11:0.50]
-; KNL-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [11:0.50]
-; KNL-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [11:0.50]
-; KNL-NEXT: #NO_APP
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_vfmsubaddpd_128:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.50]
-; SKX-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.50]
-; SKX-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.50]
-; SKX-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
-; SKX-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
-; SKX-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_vfmsubaddpd_128:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
-; ZNVER1-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
-; ZNVER1-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
-; ZNVER1-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [12:0.50]
-; ZNVER1-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [12:0.50]
-; ZNVER1-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [12:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "vfmsubadd132pd $2, $1, $0 \0A\09 vfmsubadd213pd $2, $1, $0 \0A\09 vfmsubadd231pd $2, $1, $0 \0A\09 vfmsubadd132pd $3, $1, $0 \0A\09 vfmsubadd213pd $3, $1, $0 \0A\09 vfmsubadd231pd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
- ret void
-}
-
-define void @test_vfmsubaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize {
-; GENERIC-LABEL: test_vfmsubaddpd_256:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
-; GENERIC-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
-; GENERIC-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
-; GENERIC-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [10:0.50]
-; GENERIC-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [10:0.50]
-; GENERIC-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_vfmsubaddpd_256:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
-; BDVER2-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
-; BDVER2-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
-; BDVER2-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [10:1.00]
-; BDVER2-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [10:1.00]
-; BDVER2-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [10:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: vzeroupper # sched: [46:4.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; HASWELL-LABEL: test_vfmsubaddpd_256:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
-; HASWELL-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
-; HASWELL-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [12:0.50]
-; HASWELL-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50]
-; HASWELL-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_vfmsubaddpd_256:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
-; BROADWELL-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
-; BROADWELL-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
-; BROADWELL-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50]
-; BROADWELL-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50]
-; BROADWELL-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_vfmsubaddpd_256:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50]
-; SKYLAKE-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50]
-; SKYLAKE-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_vfmsubaddpd_256:
-; KNL: # %bb.0:
-; KNL-NEXT: #APP
-; KNL-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
-; KNL-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
-; KNL-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
-; KNL-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [12:0.50]
-; KNL-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50]
-; KNL-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50]
-; KNL-NEXT: #NO_APP
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_vfmsubaddpd_256:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.50]
-; SKX-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.50]
-; SKX-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.50]
-; SKX-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50]
-; SKX-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50]
-; SKX-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_vfmsubaddpd_256:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
-; ZNVER1-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
-; ZNVER1-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
-; ZNVER1-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [12:0.50]
-; ZNVER1-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50]
-; ZNVER1-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "vfmsubadd132pd $2, $1, $0 \0A\09 vfmsubadd213pd $2, $1, $0 \0A\09 vfmsubadd231pd $2, $1, $0 \0A\09 vfmsubadd132pd $3, $1, $0 \0A\09 vfmsubadd213pd $3, $1, $0 \0A\09 vfmsubadd231pd $3, $1, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
- ret void
-}
-
-define void @test_vfmsubaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
-; GENERIC-LABEL: test_vfmsubaddps_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
-; GENERIC-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
-; GENERIC-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
-; GENERIC-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
-; GENERIC-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
-; GENERIC-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_vfmsubaddps_128:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
-; BDVER2-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
-; BDVER2-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
-; BDVER2-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
-; BDVER2-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
-; BDVER2-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; HASWELL-LABEL: test_vfmsubaddps_128:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
-; HASWELL-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
-; HASWELL-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [11:0.50]
-; HASWELL-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [11:0.50]
-; HASWELL-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [11:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_vfmsubaddps_128:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
-; BROADWELL-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
-; BROADWELL-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
-; BROADWELL-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
-; BROADWELL-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
-; BROADWELL-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_vfmsubaddps_128:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
-; SKYLAKE-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
-; SKYLAKE-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_vfmsubaddps_128:
-; KNL: # %bb.0:
-; KNL-NEXT: #APP
-; KNL-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
-; KNL-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
-; KNL-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
-; KNL-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [11:0.50]
-; KNL-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [11:0.50]
-; KNL-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [11:0.50]
-; KNL-NEXT: #NO_APP
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_vfmsubaddps_128:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.50]
-; SKX-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.50]
-; SKX-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.50]
-; SKX-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
-; SKX-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
-; SKX-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_vfmsubaddps_128:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
-; ZNVER1-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
-; ZNVER1-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
-; ZNVER1-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [12:0.50]
-; ZNVER1-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [12:0.50]
-; ZNVER1-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [12:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "vfmsubadd132ps $2, $1, $0 \0A\09 vfmsubadd213ps $2, $1, $0 \0A\09 vfmsubadd231ps $2, $1, $0 \0A\09 vfmsubadd132ps $3, $1, $0 \0A\09 vfmsubadd213ps $3, $1, $0 \0A\09 vfmsubadd231ps $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
- ret void
-}
-
-define void @test_vfmsubaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize {
-; GENERIC-LABEL: test_vfmsubaddps_256:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
-; GENERIC-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
-; GENERIC-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
-; GENERIC-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [10:0.50]
-; GENERIC-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [10:0.50]
-; GENERIC-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_vfmsubaddps_256:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
-; BDVER2-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
-; BDVER2-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
-; BDVER2-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [10:1.00]
-; BDVER2-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [10:1.00]
-; BDVER2-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [10:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: vzeroupper # sched: [46:4.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; HASWELL-LABEL: test_vfmsubaddps_256:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
-; HASWELL-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
-; HASWELL-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [12:0.50]
-; HASWELL-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50]
-; HASWELL-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_vfmsubaddps_256:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
-; BROADWELL-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
-; BROADWELL-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
-; BROADWELL-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50]
-; BROADWELL-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50]
-; BROADWELL-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_vfmsubaddps_256:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50]
-; SKYLAKE-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50]
-; SKYLAKE-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_vfmsubaddps_256:
-; KNL: # %bb.0:
-; KNL-NEXT: #APP
-; KNL-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
-; KNL-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
-; KNL-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
-; KNL-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [12:0.50]
-; KNL-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50]
-; KNL-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50]
-; KNL-NEXT: #NO_APP
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_vfmsubaddps_256:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.50]
-; SKX-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.50]
-; SKX-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.50]
-; SKX-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50]
-; SKX-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50]
-; SKX-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_vfmsubaddps_256:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
-; ZNVER1-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
-; ZNVER1-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
-; ZNVER1-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [12:0.50]
-; ZNVER1-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [12:0.50]
-; ZNVER1-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [12:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "vfmsubadd132ps $2, $1, $0 \0A\09 vfmsubadd213ps $2, $1, $0 \0A\09 vfmsubadd231ps $2, $1, $0 \0A\09 vfmsubadd132ps $3, $1, $0 \0A\09 vfmsubadd213ps $3, $1, $0 \0A\09 vfmsubadd231ps $3, $1, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
- ret void
-}
-
-;
-; VFMSUB
-;
-
-define void @test_vfmsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
-; GENERIC-LABEL: test_vfmsubpd_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; GENERIC-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; GENERIC-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; GENERIC-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
-; GENERIC-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
-; GENERIC-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_vfmsubpd_128:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; BDVER2-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; BDVER2-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; BDVER2-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
-; BDVER2-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
-; BDVER2-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; HASWELL-LABEL: test_vfmsubpd_128:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; HASWELL-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; HASWELL-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [11:0.50]
-; HASWELL-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [11:0.50]
-; HASWELL-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [11:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_vfmsubpd_128:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; BROADWELL-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; BROADWELL-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; BROADWELL-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
-; BROADWELL-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
-; BROADWELL-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_vfmsubpd_128:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
-; SKYLAKE-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
-; SKYLAKE-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_vfmsubpd_128:
-; KNL: # %bb.0:
-; KNL-NEXT: #APP
-; KNL-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; KNL-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; KNL-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; KNL-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [11:0.50]
-; KNL-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [11:0.50]
-; KNL-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [11:0.50]
-; KNL-NEXT: #NO_APP
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_vfmsubpd_128:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
-; SKX-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
-; SKX-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
-; SKX-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
-; SKX-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
-; SKX-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_vfmsubpd_128:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; ZNVER1-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; ZNVER1-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; ZNVER1-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [12:0.50]
-; ZNVER1-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [12:0.50]
-; ZNVER1-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [12:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "vfmsub132pd $2, $1, $0 \0A\09 vfmsub213pd $2, $1, $0 \0A\09 vfmsub231pd $2, $1, $0 \0A\09 vfmsub132pd $3, $1, $0 \0A\09 vfmsub213pd $3, $1, $0 \0A\09 vfmsub231pd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
- ret void
-}
-
-define void @test_vfmsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize {
-; GENERIC-LABEL: test_vfmsubpd_256:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
-; GENERIC-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
-; GENERIC-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
-; GENERIC-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [10:0.50]
-; GENERIC-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [10:0.50]
-; GENERIC-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_vfmsubpd_256:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
-; BDVER2-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
-; BDVER2-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
-; BDVER2-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [10:1.00]
-; BDVER2-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [10:1.00]
-; BDVER2-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [10:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: vzeroupper # sched: [46:4.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; HASWELL-LABEL: test_vfmsubpd_256:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
-; HASWELL-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
-; HASWELL-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [12:0.50]
-; HASWELL-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50]
-; HASWELL-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_vfmsubpd_256:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
-; BROADWELL-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
-; BROADWELL-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
-; BROADWELL-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50]
-; BROADWELL-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50]
-; BROADWELL-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_vfmsubpd_256:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50]
-; SKYLAKE-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50]
-; SKYLAKE-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_vfmsubpd_256:
-; KNL: # %bb.0:
-; KNL-NEXT: #APP
-; KNL-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
-; KNL-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
-; KNL-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
-; KNL-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [12:0.50]
-; KNL-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50]
-; KNL-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50]
-; KNL-NEXT: #NO_APP
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_vfmsubpd_256:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.50]
-; SKX-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.50]
-; SKX-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.50]
-; SKX-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50]
-; SKX-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50]
-; SKX-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_vfmsubpd_256:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
-; ZNVER1-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
-; ZNVER1-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
-; ZNVER1-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [12:0.50]
-; ZNVER1-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50]
-; ZNVER1-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "vfmsub132pd $2, $1, $0 \0A\09 vfmsub213pd $2, $1, $0 \0A\09 vfmsub231pd $2, $1, $0 \0A\09 vfmsub132pd $3, $1, $0 \0A\09 vfmsub213pd $3, $1, $0 \0A\09 vfmsub231pd $3, $1, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
- ret void
-}
-
-define void @test_vfmsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
-; GENERIC-LABEL: test_vfmsubps_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; GENERIC-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; GENERIC-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; GENERIC-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
-; GENERIC-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
-; GENERIC-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_vfmsubps_128:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; BDVER2-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; BDVER2-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; BDVER2-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
-; BDVER2-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
-; BDVER2-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; HASWELL-LABEL: test_vfmsubps_128:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; HASWELL-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; HASWELL-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [11:0.50]
-; HASWELL-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [11:0.50]
-; HASWELL-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [11:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_vfmsubps_128:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; BROADWELL-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; BROADWELL-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; BROADWELL-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
-; BROADWELL-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
-; BROADWELL-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_vfmsubps_128:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
-; SKYLAKE-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
-; SKYLAKE-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_vfmsubps_128:
-; KNL: # %bb.0:
-; KNL-NEXT: #APP
-; KNL-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; KNL-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; KNL-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; KNL-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [11:0.50]
-; KNL-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [11:0.50]
-; KNL-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [11:0.50]
-; KNL-NEXT: #NO_APP
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_vfmsubps_128:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
-; SKX-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
-; SKX-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
-; SKX-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
-; SKX-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
-; SKX-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_vfmsubps_128:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; ZNVER1-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; ZNVER1-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; ZNVER1-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [12:0.50]
-; ZNVER1-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [12:0.50]
-; ZNVER1-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [12:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "vfmsub132ps $2, $1, $0 \0A\09 vfmsub213ps $2, $1, $0 \0A\09 vfmsub231ps $2, $1, $0 \0A\09 vfmsub132ps $3, $1, $0 \0A\09 vfmsub213ps $3, $1, $0 \0A\09 vfmsub231ps $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
- ret void
-}
-
-define void @test_vfmsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize {
-; GENERIC-LABEL: test_vfmsubps_256:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
-; GENERIC-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
-; GENERIC-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
-; GENERIC-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [10:0.50]
-; GENERIC-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [10:0.50]
-; GENERIC-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_vfmsubps_256:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
-; BDVER2-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
-; BDVER2-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
-; BDVER2-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [10:1.00]
-; BDVER2-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [10:1.00]
-; BDVER2-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [10:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: vzeroupper # sched: [46:4.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; HASWELL-LABEL: test_vfmsubps_256:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
-; HASWELL-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
-; HASWELL-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [12:0.50]
-; HASWELL-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50]
-; HASWELL-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_vfmsubps_256:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
-; BROADWELL-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
-; BROADWELL-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
-; BROADWELL-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50]
-; BROADWELL-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50]
-; BROADWELL-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_vfmsubps_256:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50]
-; SKYLAKE-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50]
-; SKYLAKE-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_vfmsubps_256:
-; KNL: # %bb.0:
-; KNL-NEXT: #APP
-; KNL-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
-; KNL-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
-; KNL-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
-; KNL-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [12:0.50]
-; KNL-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50]
-; KNL-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50]
-; KNL-NEXT: #NO_APP
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_vfmsubps_256:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.50]
-; SKX-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.50]
-; SKX-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.50]
-; SKX-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50]
-; SKX-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50]
-; SKX-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_vfmsubps_256:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
-; ZNVER1-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
-; ZNVER1-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
-; ZNVER1-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [12:0.50]
-; ZNVER1-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [12:0.50]
-; ZNVER1-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [12:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "vfmsub132ps $2, $1, $0 \0A\09 vfmsub213ps $2, $1, $0 \0A\09 vfmsub231ps $2, $1, $0 \0A\09 vfmsub132ps $3, $1, $0 \0A\09 vfmsub213ps $3, $1, $0 \0A\09 vfmsub231ps $3, $1, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
- ret void
-}
-
-define void @test_vfmsubsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
-; GENERIC-LABEL: test_vfmsubsd_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; GENERIC-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; GENERIC-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; GENERIC-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
-; GENERIC-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
-; GENERIC-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_vfmsubsd_128:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; BDVER2-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; BDVER2-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; BDVER2-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
-; BDVER2-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
-; BDVER2-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; HASWELL-LABEL: test_vfmsubsd_128:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; HASWELL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; HASWELL-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
-; HASWELL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
-; HASWELL-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_vfmsubsd_128:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; BROADWELL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; BROADWELL-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; BROADWELL-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
-; BROADWELL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
-; BROADWELL-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_vfmsubsd_128:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50]
-; SKYLAKE-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50]
-; SKYLAKE-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_vfmsubsd_128:
-; KNL: # %bb.0:
-; KNL-NEXT: #APP
-; KNL-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; KNL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; KNL-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; KNL-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
-; KNL-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
-; KNL-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
-; KNL-NEXT: #NO_APP
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_vfmsubsd_128:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
-; SKX-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
-; SKX-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
-; SKX-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50]
-; SKX-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50]
-; SKX-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_vfmsubsd_128:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; ZNVER1-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; ZNVER1-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; ZNVER1-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [12:0.50]
-; ZNVER1-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [12:0.50]
-; ZNVER1-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [12:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "vfmsub132sd $2, $1, $0 \0A\09 vfmsub213sd $2, $1, $0 \0A\09 vfmsub231sd $2, $1, $0 \0A\09 vfmsub132sd $3, $1, $0 \0A\09 vfmsub213sd $3, $1, $0 \0A\09 vfmsub231sd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
- ret void
-}
-
-define void @test_vfmsubss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
-; GENERIC-LABEL: test_vfmsubss_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; GENERIC-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; GENERIC-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; GENERIC-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
-; GENERIC-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
-; GENERIC-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_vfmsubss_128:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; BDVER2-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; BDVER2-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; BDVER2-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
-; BDVER2-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
-; BDVER2-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; HASWELL-LABEL: test_vfmsubss_128:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; HASWELL-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; HASWELL-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
-; HASWELL-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
-; HASWELL-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_vfmsubss_128:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; BROADWELL-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; BROADWELL-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; BROADWELL-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
-; BROADWELL-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
-; BROADWELL-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_vfmsubss_128:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
-; SKYLAKE-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50]
-; SKYLAKE-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50]
-; SKYLAKE-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_vfmsubss_128:
-; KNL: # %bb.0:
-; KNL-NEXT: #APP
-; KNL-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; KNL-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; KNL-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; KNL-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
-; KNL-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
-; KNL-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
-; KNL-NEXT: #NO_APP
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_vfmsubss_128:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50]
-; SKX-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50]
-; SKX-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50]
-; SKX-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50]
-; SKX-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50]
-; SKX-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_vfmsubss_128:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; ZNVER1-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; ZNVER1-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; ZNVER1-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [12:0.50]
-; ZNVER1-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [12:0.50]
-; ZNVER1-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [12:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "vfmsub132ss $2, $1, $0 \0A\09 vfmsub213ss $2, $1, $0 \0A\09 vfmsub231ss $2, $1, $0 \0A\09 vfmsub132ss $3, $1, $0 \0A\09 vfmsub213ss $3, $1, $0 \0A\09 vfmsub231ss $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
- ret void
-}
-
-;
-; VFNMADD
-;
-
-define void @test_vfnmaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
-; GENERIC-LABEL: test_vfnmaddpd_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; GENERIC-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; GENERIC-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; GENERIC-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
-; GENERIC-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; GENERIC-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_vfnmaddpd_128:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; BDVER2-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; BDVER2-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; BDVER2-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
-; BDVER2-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; BDVER2-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; HASWELL-LABEL: test_vfnmaddpd_128:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [11:0.50]
-; HASWELL-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [11:0.50]
-; HASWELL-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [11:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_vfnmaddpd_128:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; BROADWELL-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; BROADWELL-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; BROADWELL-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
-; BROADWELL-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; BROADWELL-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_vfnmaddpd_128:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
-; SKYLAKE-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
-; SKYLAKE-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
-; SKYLAKE-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
-; SKYLAKE-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; SKYLAKE-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_vfnmaddpd_128:
-; KNL: # %bb.0:
-; KNL-NEXT: #APP
-; KNL-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; KNL-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; KNL-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; KNL-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [11:0.50]
-; KNL-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [11:0.50]
-; KNL-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [11:0.50]
-; KNL-NEXT: #NO_APP
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_vfnmaddpd_128:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
-; SKX-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
-; SKX-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
-; SKX-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
-; SKX-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; SKX-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_vfnmaddpd_128:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; ZNVER1-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; ZNVER1-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; ZNVER1-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [12:0.50]
-; ZNVER1-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [12:0.50]
-; ZNVER1-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [12:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "vfnmadd132pd $2, $1, $0 \0A\09 vfnmadd213pd $2, $1, $0 \0A\09 vfnmadd231pd $2, $1, $0 \0A\09 vfnmadd132pd $3, $1, $0 \0A\09 vfnmadd213pd $3, $1, $0 \0A\09 vfnmadd231pd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
- ret void
-}
-
-define void @test_vfnmaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize {
-; GENERIC-LABEL: test_vfnmaddpd_256:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
-; GENERIC-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
-; GENERIC-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
-; GENERIC-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [10:0.50]
-; GENERIC-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [10:0.50]
-; GENERIC-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_vfnmaddpd_256:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
-; BDVER2-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
-; BDVER2-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
-; BDVER2-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [10:1.00]
-; BDVER2-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [10:1.00]
-; BDVER2-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [10:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: vzeroupper # sched: [46:4.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; HASWELL-LABEL: test_vfnmaddpd_256:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [12:0.50]
-; HASWELL-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50]
-; HASWELL-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_vfnmaddpd_256:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
-; BROADWELL-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
-; BROADWELL-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
-; BROADWELL-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50]
-; BROADWELL-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
-; BROADWELL-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_vfnmaddpd_256:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.50]
-; SKYLAKE-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.50]
-; SKYLAKE-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.50]
-; SKYLAKE-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50]
-; SKYLAKE-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
-; SKYLAKE-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_vfnmaddpd_256:
-; KNL: # %bb.0:
-; KNL-NEXT: #APP
-; KNL-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
-; KNL-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
-; KNL-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
-; KNL-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [12:0.50]
-; KNL-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50]
-; KNL-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50]
-; KNL-NEXT: #NO_APP
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_vfnmaddpd_256:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.50]
-; SKX-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.50]
-; SKX-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.50]
-; SKX-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50]
-; SKX-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
-; SKX-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_vfnmaddpd_256:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
-; ZNVER1-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
-; ZNVER1-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
-; ZNVER1-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [12:0.50]
-; ZNVER1-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50]
-; ZNVER1-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "vfnmadd132pd $2, $1, $0 \0A\09 vfnmadd213pd $2, $1, $0 \0A\09 vfnmadd231pd $2, $1, $0 \0A\09 vfnmadd132pd $3, $1, $0 \0A\09 vfnmadd213pd $3, $1, $0 \0A\09 vfnmadd231pd $3, $1, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
- ret void
-}
-
-define void @test_vfnmaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
-; GENERIC-LABEL: test_vfnmaddps_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; GENERIC-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; GENERIC-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; GENERIC-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
-; GENERIC-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; GENERIC-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_vfnmaddps_128:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; BDVER2-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; BDVER2-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; BDVER2-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
-; BDVER2-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; BDVER2-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; HASWELL-LABEL: test_vfnmaddps_128:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [11:0.50]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [11:0.50]
-; HASWELL-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [11:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_vfnmaddps_128:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; BROADWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; BROADWELL-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; BROADWELL-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
-; BROADWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; BROADWELL-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_vfnmaddps_128:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
-; SKYLAKE-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
-; SKYLAKE-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
-; SKYLAKE-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
-; SKYLAKE-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; SKYLAKE-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_vfnmaddps_128:
-; KNL: # %bb.0:
-; KNL-NEXT: #APP
-; KNL-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; KNL-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; KNL-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [11:0.50]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [11:0.50]
-; KNL-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [11:0.50]
-; KNL-NEXT: #NO_APP
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_vfnmaddps_128:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
-; SKX-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
-; SKX-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; SKX-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_vfnmaddps_128:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; ZNVER1-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; ZNVER1-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; ZNVER1-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [12:0.50]
-; ZNVER1-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [12:0.50]
-; ZNVER1-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [12:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "vfnmadd132ps $2, $1, $0 \0A\09 vfnmadd213ps $2, $1, $0 \0A\09 vfnmadd231ps $2, $1, $0 \0A\09 vfnmadd132ps $3, $1, $0 \0A\09 vfnmadd213ps $3, $1, $0 \0A\09 vfnmadd231ps $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
- ret void
-}
-
-define void @test_vfnmaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize {
-; GENERIC-LABEL: test_vfnmaddps_256:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
-; GENERIC-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
-; GENERIC-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
-; GENERIC-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [10:0.50]
-; GENERIC-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [10:0.50]
-; GENERIC-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_vfnmaddps_256:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
-; BDVER2-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
-; BDVER2-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
-; BDVER2-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [10:1.00]
-; BDVER2-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [10:1.00]
-; BDVER2-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [10:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: vzeroupper # sched: [46:4.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; HASWELL-LABEL: test_vfnmaddps_256:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [12:0.50]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50]
-; HASWELL-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_vfnmaddps_256:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
-; BROADWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
-; BROADWELL-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
-; BROADWELL-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50]
-; BROADWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
-; BROADWELL-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_vfnmaddps_256:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.50]
-; SKYLAKE-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.50]
-; SKYLAKE-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.50]
-; SKYLAKE-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50]
-; SKYLAKE-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
-; SKYLAKE-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_vfnmaddps_256:
-; KNL: # %bb.0:
-; KNL-NEXT: #APP
-; KNL-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
-; KNL-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
-; KNL-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [12:0.50]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50]
-; KNL-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50]
-; KNL-NEXT: #NO_APP
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_vfnmaddps_256:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.50]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.50]
-; SKX-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.50]
-; SKX-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
-; SKX-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_vfnmaddps_256:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
-; ZNVER1-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
-; ZNVER1-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
-; ZNVER1-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [12:0.50]
-; ZNVER1-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [12:0.50]
-; ZNVER1-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [12:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "vfnmadd132ps $2, $1, $0 \0A\09 vfnmadd213ps $2, $1, $0 \0A\09 vfnmadd231ps $2, $1, $0 \0A\09 vfnmadd132ps $3, $1, $0 \0A\09 vfnmadd213ps $3, $1, $0 \0A\09 vfnmadd231ps $3, $1, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
- ret void
-}
-
-define void @test_vfnmaddsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
-; GENERIC-LABEL: test_vfnmaddsd_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; GENERIC-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; GENERIC-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; GENERIC-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
-; GENERIC-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; GENERIC-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_vfnmaddsd_128:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; BDVER2-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; BDVER2-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; BDVER2-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
-; BDVER2-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; BDVER2-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; HASWELL-LABEL: test_vfnmaddsd_128:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
-; HASWELL-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; HASWELL-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_vfnmaddsd_128:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; BROADWELL-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; BROADWELL-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; BROADWELL-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
-; BROADWELL-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; BROADWELL-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_vfnmaddsd_128:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
-; SKYLAKE-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
-; SKYLAKE-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
-; SKYLAKE-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50]
-; SKYLAKE-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50]
-; SKYLAKE-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_vfnmaddsd_128:
-; KNL: # %bb.0:
-; KNL-NEXT: #APP
-; KNL-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; KNL-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; KNL-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; KNL-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
-; KNL-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; KNL-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
-; KNL-NEXT: #NO_APP
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_vfnmaddsd_128:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
-; SKX-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
-; SKX-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
-; SKX-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50]
-; SKX-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50]
-; SKX-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_vfnmaddsd_128:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; ZNVER1-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; ZNVER1-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; ZNVER1-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [12:0.50]
-; ZNVER1-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [12:0.50]
-; ZNVER1-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [12:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "vfnmadd132sd $2, $1, $0 \0A\09 vfnmadd213sd $2, $1, $0 \0A\09 vfnmadd231sd $2, $1, $0 \0A\09 vfnmadd132sd $3, $1, $0 \0A\09 vfnmadd213sd $3, $1, $0 \0A\09 vfnmadd231sd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
- ret void
-}
-
-define void @test_vfnmaddss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
-; GENERIC-LABEL: test_vfnmaddss_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; GENERIC-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; GENERIC-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; GENERIC-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
-; GENERIC-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; GENERIC-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_vfnmaddss_128:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; BDVER2-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; BDVER2-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; BDVER2-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
-; BDVER2-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; BDVER2-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; HASWELL-LABEL: test_vfnmaddss_128:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
-; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; HASWELL-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_vfnmaddss_128:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; BROADWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; BROADWELL-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; BROADWELL-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
-; BROADWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; BROADWELL-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_vfnmaddss_128:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
-; SKYLAKE-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
-; SKYLAKE-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
-; SKYLAKE-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50]
-; SKYLAKE-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50]
-; SKYLAKE-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_vfnmaddss_128:
-; KNL: # %bb.0:
-; KNL-NEXT: #APP
-; KNL-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; KNL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; KNL-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; KNL-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
-; KNL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; KNL-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
-; KNL-NEXT: #NO_APP
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_vfnmaddss_128:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50]
-; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50]
-; SKX-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50]
-; SKX-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50]
-; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50]
-; SKX-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_vfnmaddss_128:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
-; ZNVER1-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; ZNVER1-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
-; ZNVER1-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [12:0.50]
-; ZNVER1-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [12:0.50]
-; ZNVER1-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [12:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "vfnmadd132ss $2, $1, $0 \0A\09 vfnmadd213ss $2, $1, $0 \0A\09 vfnmadd231ss $2, $1, $0 \0A\09 vfnmadd132ss $3, $1, $0 \0A\09 vfnmadd213ss $3, $1, $0 \0A\09 vfnmadd231ss $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
- ret void
-}
-
-;
-; VFNMSUB
-;
-
-define void @test_vfnmsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
-; GENERIC-LABEL: test_vfnmsubpd_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; GENERIC-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; GENERIC-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; GENERIC-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
-; GENERIC-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
-; GENERIC-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_vfnmsubpd_128:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; BDVER2-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; BDVER2-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; BDVER2-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
-; BDVER2-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
-; BDVER2-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; HASWELL-LABEL: test_vfnmsubpd_128:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; HASWELL-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; HASWELL-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [11:0.50]
-; HASWELL-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [11:0.50]
-; HASWELL-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [11:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_vfnmsubpd_128:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; BROADWELL-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; BROADWELL-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; BROADWELL-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
-; BROADWELL-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
-; BROADWELL-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_vfnmsubpd_128:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
-; SKYLAKE-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
-; SKYLAKE-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
-; SKYLAKE-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
-; SKYLAKE-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
-; SKYLAKE-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_vfnmsubpd_128:
-; KNL: # %bb.0:
-; KNL-NEXT: #APP
-; KNL-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; KNL-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; KNL-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; KNL-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [11:0.50]
-; KNL-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [11:0.50]
-; KNL-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [11:0.50]
-; KNL-NEXT: #NO_APP
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_vfnmsubpd_128:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
-; SKX-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
-; SKX-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
-; SKX-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
-; SKX-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
-; SKX-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_vfnmsubpd_128:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; ZNVER1-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; ZNVER1-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; ZNVER1-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [12:0.50]
-; ZNVER1-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [12:0.50]
-; ZNVER1-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [12:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "vfnmsub132pd $2, $1, $0 \0A\09 vfnmsub213pd $2, $1, $0 \0A\09 vfnmsub231pd $2, $1, $0 \0A\09 vfnmsub132pd $3, $1, $0 \0A\09 vfnmsub213pd $3, $1, $0 \0A\09 vfnmsub231pd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
- ret void
-}
-
-define void @test_vfnmsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize {
-; GENERIC-LABEL: test_vfnmsubpd_256:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
-; GENERIC-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
-; GENERIC-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
-; GENERIC-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [10:0.50]
-; GENERIC-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [10:0.50]
-; GENERIC-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_vfnmsubpd_256:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
-; BDVER2-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
-; BDVER2-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
-; BDVER2-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [10:1.00]
-; BDVER2-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [10:1.00]
-; BDVER2-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [10:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: vzeroupper # sched: [46:4.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; HASWELL-LABEL: test_vfnmsubpd_256:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
-; HASWELL-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
-; HASWELL-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [12:0.50]
-; HASWELL-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50]
-; HASWELL-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_vfnmsubpd_256:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
-; BROADWELL-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
-; BROADWELL-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
-; BROADWELL-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50]
-; BROADWELL-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50]
-; BROADWELL-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_vfnmsubpd_256:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.50]
-; SKYLAKE-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.50]
-; SKYLAKE-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.50]
-; SKYLAKE-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50]
-; SKYLAKE-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50]
-; SKYLAKE-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_vfnmsubpd_256:
-; KNL: # %bb.0:
-; KNL-NEXT: #APP
-; KNL-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
-; KNL-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
-; KNL-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
-; KNL-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [12:0.50]
-; KNL-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50]
-; KNL-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50]
-; KNL-NEXT: #NO_APP
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_vfnmsubpd_256:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.50]
-; SKX-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.50]
-; SKX-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.50]
-; SKX-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50]
-; SKX-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50]
-; SKX-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_vfnmsubpd_256:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
-; ZNVER1-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
-; ZNVER1-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
-; ZNVER1-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [12:0.50]
-; ZNVER1-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50]
-; ZNVER1-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "vfnmsub132pd $2, $1, $0 \0A\09 vfnmsub213pd $2, $1, $0 \0A\09 vfnmsub231pd $2, $1, $0 \0A\09 vfnmsub132pd $3, $1, $0 \0A\09 vfnmsub213pd $3, $1, $0 \0A\09 vfnmsub231pd $3, $1, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
- ret void
-}
-
-define void @test_vfnmsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
-; GENERIC-LABEL: test_vfnmsubps_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; GENERIC-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; GENERIC-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; GENERIC-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
-; GENERIC-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
-; GENERIC-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_vfnmsubps_128:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; BDVER2-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; BDVER2-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; BDVER2-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
-; BDVER2-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
-; BDVER2-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; HASWELL-LABEL: test_vfnmsubps_128:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; HASWELL-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; HASWELL-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [11:0.50]
-; HASWELL-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [11:0.50]
-; HASWELL-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [11:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_vfnmsubps_128:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; BROADWELL-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; BROADWELL-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; BROADWELL-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
-; BROADWELL-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
-; BROADWELL-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_vfnmsubps_128:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
-; SKYLAKE-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
-; SKYLAKE-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
-; SKYLAKE-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
-; SKYLAKE-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
-; SKYLAKE-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_vfnmsubps_128:
-; KNL: # %bb.0:
-; KNL-NEXT: #APP
-; KNL-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; KNL-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; KNL-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; KNL-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [11:0.50]
-; KNL-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [11:0.50]
-; KNL-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [11:0.50]
-; KNL-NEXT: #NO_APP
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_vfnmsubps_128:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
-; SKX-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
-; SKX-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
-; SKX-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
-; SKX-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
-; SKX-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_vfnmsubps_128:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; ZNVER1-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; ZNVER1-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; ZNVER1-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [12:0.50]
-; ZNVER1-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [12:0.50]
-; ZNVER1-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [12:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "vfnmsub132ps $2, $1, $0 \0A\09 vfnmsub213ps $2, $1, $0 \0A\09 vfnmsub231ps $2, $1, $0 \0A\09 vfnmsub132ps $3, $1, $0 \0A\09 vfnmsub213ps $3, $1, $0 \0A\09 vfnmsub231ps $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
- ret void
-}
-
-define void @test_vfnmsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize {
-; GENERIC-LABEL: test_vfnmsubps_256:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
-; GENERIC-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
-; GENERIC-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
-; GENERIC-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [10:0.50]
-; GENERIC-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [10:0.50]
-; GENERIC-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_vfnmsubps_256:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
-; BDVER2-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
-; BDVER2-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
-; BDVER2-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [10:1.00]
-; BDVER2-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [10:1.00]
-; BDVER2-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [10:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: vzeroupper # sched: [46:4.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; HASWELL-LABEL: test_vfnmsubps_256:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
-; HASWELL-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
-; HASWELL-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [12:0.50]
-; HASWELL-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50]
-; HASWELL-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: vzeroupper # sched: [0:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_vfnmsubps_256:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
-; BROADWELL-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
-; BROADWELL-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
-; BROADWELL-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50]
-; BROADWELL-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50]
-; BROADWELL-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: vzeroupper # sched: [0:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_vfnmsubps_256:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.50]
-; SKYLAKE-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.50]
-; SKYLAKE-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.50]
-; SKYLAKE-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50]
-; SKYLAKE-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50]
-; SKYLAKE-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: vzeroupper # sched: [0:0.67]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_vfnmsubps_256:
-; KNL: # %bb.0:
-; KNL-NEXT: #APP
-; KNL-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
-; KNL-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
-; KNL-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
-; KNL-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [12:0.50]
-; KNL-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50]
-; KNL-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50]
-; KNL-NEXT: #NO_APP
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_vfnmsubps_256:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.50]
-; SKX-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.50]
-; SKX-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.50]
-; SKX-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50]
-; SKX-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50]
-; SKX-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: vzeroupper # sched: [0:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_vfnmsubps_256:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
-; ZNVER1-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
-; ZNVER1-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
-; ZNVER1-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [12:0.50]
-; ZNVER1-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [12:0.50]
-; ZNVER1-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [12:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: vzeroupper # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "vfnmsub132ps $2, $1, $0 \0A\09 vfnmsub213ps $2, $1, $0 \0A\09 vfnmsub231ps $2, $1, $0 \0A\09 vfnmsub132ps $3, $1, $0 \0A\09 vfnmsub213ps $3, $1, $0 \0A\09 vfnmsub231ps $3, $1, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
- ret void
-}
-
-define void @test_vfnmsubsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
-; GENERIC-LABEL: test_vfnmsubsd_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; GENERIC-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; GENERIC-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; GENERIC-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
-; GENERIC-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
-; GENERIC-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_vfnmsubsd_128:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; BDVER2-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; BDVER2-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; BDVER2-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
-; BDVER2-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
-; BDVER2-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; HASWELL-LABEL: test_vfnmsubsd_128:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; HASWELL-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; HASWELL-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
-; HASWELL-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
-; HASWELL-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_vfnmsubsd_128:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; BROADWELL-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; BROADWELL-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; BROADWELL-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
-; BROADWELL-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
-; BROADWELL-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_vfnmsubsd_128:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
-; SKYLAKE-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
-; SKYLAKE-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
-; SKYLAKE-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50]
-; SKYLAKE-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50]
-; SKYLAKE-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_vfnmsubsd_128:
-; KNL: # %bb.0:
-; KNL-NEXT: #APP
-; KNL-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; KNL-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; KNL-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; KNL-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
-; KNL-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
-; KNL-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
-; KNL-NEXT: #NO_APP
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_vfnmsubsd_128:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
-; SKX-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
-; SKX-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
-; SKX-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50]
-; SKX-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50]
-; SKX-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_vfnmsubsd_128:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; ZNVER1-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; ZNVER1-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; ZNVER1-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [12:0.50]
-; ZNVER1-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [12:0.50]
-; ZNVER1-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [12:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "vfnmsub132sd $2, $1, $0 \0A\09 vfnmsub213sd $2, $1, $0 \0A\09 vfnmsub231sd $2, $1, $0 \0A\09 vfnmsub132sd $3, $1, $0 \0A\09 vfnmsub213sd $3, $1, $0 \0A\09 vfnmsub231sd $3, $1, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
- ret void
-}
-
-define void @test_vfnmsubss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
-; GENERIC-LABEL: test_vfnmsubss_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; GENERIC-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; GENERIC-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; GENERIC-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
-; GENERIC-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
-; GENERIC-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_vfnmsubss_128:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; BDVER2-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; BDVER2-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; BDVER2-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
-; BDVER2-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
-; BDVER2-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; HASWELL-LABEL: test_vfnmsubss_128:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; HASWELL-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; HASWELL-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
-; HASWELL-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
-; HASWELL-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_vfnmsubss_128:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; BROADWELL-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; BROADWELL-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; BROADWELL-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
-; BROADWELL-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
-; BROADWELL-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_vfnmsubss_128:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
-; SKYLAKE-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
-; SKYLAKE-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
-; SKYLAKE-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50]
-; SKYLAKE-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50]
-; SKYLAKE-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_vfnmsubss_128:
-; KNL: # %bb.0:
-; KNL-NEXT: #APP
-; KNL-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; KNL-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; KNL-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; KNL-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
-; KNL-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
-; KNL-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
-; KNL-NEXT: #NO_APP
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_vfnmsubss_128:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50]
-; SKX-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50]
-; SKX-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50]
-; SKX-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50]
-; SKX-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50]
-; SKX-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_vfnmsubss_128:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
-; ZNVER1-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
-; ZNVER1-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
-; ZNVER1-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [12:0.50]
-; ZNVER1-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [12:0.50]
-; ZNVER1-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [12:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "vfnmsub132ss $2, $1, $0 \0A\09 vfnmsub213ss $2, $1, $0 \0A\09 vfnmsub231ss $2, $1, $0 \0A\09 vfnmsub132ss $3, $1, $0 \0A\09 vfnmsub213ss $3, $1, $0 \0A\09 vfnmsub231ss $3, $1, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
- ret void
-}
diff --git a/llvm/test/CodeGen/X86/fma4-schedule.ll b/llvm/test/CodeGen/X86/fma4-schedule.ll
deleted file mode 100644
index 118ad2b10be..00000000000
--- a/llvm/test/CodeGen/X86/fma4-schedule.ll
+++ /dev/null
@@ -1,1058 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+fma4 | FileCheck %s --check-prefixes=CHECK,GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver1 | FileCheck %s --check-prefixes=CHECK,BDVER,BDVER12,BDVER1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-fma | FileCheck %s --check-prefixes=CHECK,BDVER,BDVER12,BDVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver3 -mattr=-fma | FileCheck %s --check-prefixes=CHECK,BDVER,BDVER34,BDVER3
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver4 -mattr=-fma | FileCheck %s --check-prefixes=CHECK,BDVER,BDVER34,BDVER4
-
-;
-; VFMADD
-;
-
-define void @test_vfmaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
-; GENERIC-LABEL: test_vfmaddpd_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT: vfmaddpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: vfmaddpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vfmaddpd_128:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; BDVER12-NEXT: vfmaddpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: vfmaddpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER34-LABEL: test_vfmaddpd_128:
-; BDVER34: # %bb.0:
-; BDVER34-NEXT: #APP
-; BDVER34-NEXT: vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfmaddpd (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfmaddpd %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER34-NEXT: #NO_APP
-; BDVER34-NEXT: retq
- tail call void asm "vfmaddpd $2, $1, $0, $0 \0A\09 vfmaddpd $3, $1, $0, $0 \0A\09 vfmaddpd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
- ret void
-}
-
-define void @test_vfmaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize {
-; GENERIC-LABEL: test_vfmaddpd_256:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; GENERIC-NEXT: vfmaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
-; GENERIC-NEXT: vfmaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vfmaddpd_256:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; BDVER12-NEXT: vfmaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:1.00]
-; BDVER12-NEXT: vfmaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: vzeroupper # sched: [46:4.00]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER34-LABEL: test_vfmaddpd_256:
-; BDVER34: # %bb.0:
-; BDVER34-NEXT: #APP
-; BDVER34-NEXT: vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER34-NEXT: vfmaddpd (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER34-NEXT: vfmaddpd %ymm1, (%rdi), %ymm0, %ymm0
-; BDVER34-NEXT: #NO_APP
-; BDVER34-NEXT: vzeroupper
-; BDVER34-NEXT: retq
- tail call void asm "vfmaddpd $2, $1, $0, $0 \0A\09 vfmaddpd $3, $1, $0, $0 \0A\09 vfmaddpd $1, $3, $0, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
- ret void
-}
-
-define void @test_vfmaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
-; GENERIC-LABEL: test_vfmaddps_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT: vfmaddps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vfmaddps_128:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; BDVER12-NEXT: vfmaddps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER34-LABEL: test_vfmaddps_128:
-; BDVER34: # %bb.0:
-; BDVER34-NEXT: #APP
-; BDVER34-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfmaddps (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfmaddps %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER34-NEXT: #NO_APP
-; BDVER34-NEXT: retq
- tail call void asm "vfmaddps $2, $1, $0, $0 \0A\09 vfmaddps $3, $1, $0, $0 \0A\09 vfmaddps $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
- ret void
-}
-
-define void @test_vfmaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize {
-; GENERIC-LABEL: test_vfmaddps_256:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; GENERIC-NEXT: vfmaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
-; GENERIC-NEXT: vfmaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vfmaddps_256:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; BDVER12-NEXT: vfmaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:1.00]
-; BDVER12-NEXT: vfmaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: vzeroupper # sched: [46:4.00]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER34-LABEL: test_vfmaddps_256:
-; BDVER34: # %bb.0:
-; BDVER34-NEXT: #APP
-; BDVER34-NEXT: vfmaddps %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER34-NEXT: vfmaddps (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER34-NEXT: vfmaddps %ymm1, (%rdi), %ymm0, %ymm0
-; BDVER34-NEXT: #NO_APP
-; BDVER34-NEXT: vzeroupper
-; BDVER34-NEXT: retq
- tail call void asm "vfmaddps $2, $1, $0, $0 \0A\09 vfmaddps $3, $1, $0, $0 \0A\09 vfmaddps $1, $3, $0, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
- ret void
-}
-
-define void @test_vfmaddsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
-; GENERIC-LABEL: test_vfmaddsd_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT: vfmaddsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: vfmaddsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vfmaddsd_128:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; BDVER12-NEXT: vfmaddsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: vfmaddsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER34-LABEL: test_vfmaddsd_128:
-; BDVER34: # %bb.0:
-; BDVER34-NEXT: #APP
-; BDVER34-NEXT: vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfmaddsd (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfmaddsd %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER34-NEXT: #NO_APP
-; BDVER34-NEXT: retq
- tail call void asm "vfmaddsd $2, $1, $0, $0 \0A\09 vfmaddsd $3, $1, $0, $0 \0A\09 vfmaddsd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
- ret void
-}
-
-define void @test_vfmaddss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
-; GENERIC-LABEL: test_vfmaddss_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT: vfmaddss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: vfmaddss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vfmaddss_128:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; BDVER12-NEXT: vfmaddss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: vfmaddss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER34-LABEL: test_vfmaddss_128:
-; BDVER34: # %bb.0:
-; BDVER34-NEXT: #APP
-; BDVER34-NEXT: vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfmaddss (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfmaddss %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER34-NEXT: #NO_APP
-; BDVER34-NEXT: retq
- tail call void asm "vfmaddss $2, $1, $0, $0 \0A\09 vfmaddss $3, $1, $0, $0 \0A\09 vfmaddss $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
- ret void
-}
-
-;
-; VFMADDSUB
-;
-
-define void @test_vfmaddsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
-; GENERIC-LABEL: test_vfmaddsubpd_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT: vfmaddsubpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: vfmaddsubpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vfmaddsubpd_128:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; BDVER12-NEXT: vfmaddsubpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: vfmaddsubpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER34-LABEL: test_vfmaddsubpd_128:
-; BDVER34: # %bb.0:
-; BDVER34-NEXT: #APP
-; BDVER34-NEXT: vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfmaddsubpd (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfmaddsubpd %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER34-NEXT: #NO_APP
-; BDVER34-NEXT: retq
- tail call void asm "vfmaddsubpd $2, $1, $0, $0 \0A\09 vfmaddsubpd $3, $1, $0, $0 \0A\09 vfmaddsubpd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
- ret void
-}
-
-define void @test_vfmaddsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize {
-; GENERIC-LABEL: test_vfmaddsubpd_256:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; GENERIC-NEXT: vfmaddsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
-; GENERIC-NEXT: vfmaddsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vfmaddsubpd_256:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; BDVER12-NEXT: vfmaddsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:1.00]
-; BDVER12-NEXT: vfmaddsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: vzeroupper # sched: [46:4.00]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER34-LABEL: test_vfmaddsubpd_256:
-; BDVER34: # %bb.0:
-; BDVER34-NEXT: #APP
-; BDVER34-NEXT: vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER34-NEXT: vfmaddsubpd (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER34-NEXT: vfmaddsubpd %ymm1, (%rdi), %ymm0, %ymm0
-; BDVER34-NEXT: #NO_APP
-; BDVER34-NEXT: vzeroupper
-; BDVER34-NEXT: retq
- tail call void asm "vfmaddsubpd $2, $1, $0, $0 \0A\09 vfmaddsubpd $3, $1, $0, $0 \0A\09 vfmaddsubpd $1, $3, $0, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
- ret void
-}
-
-define void @test_vfmaddsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
-; GENERIC-LABEL: test_vfmaddsubps_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT: vfmaddsubps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: vfmaddsubps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vfmaddsubps_128:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; BDVER12-NEXT: vfmaddsubps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: vfmaddsubps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER34-LABEL: test_vfmaddsubps_128:
-; BDVER34: # %bb.0:
-; BDVER34-NEXT: #APP
-; BDVER34-NEXT: vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfmaddsubps (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfmaddsubps %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER34-NEXT: #NO_APP
-; BDVER34-NEXT: retq
- tail call void asm "vfmaddsubps $2, $1, $0, $0 \0A\09 vfmaddsubps $3, $1, $0, $0 \0A\09 vfmaddsubps $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
- ret void
-}
-
-define void @test_vfmaddsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize {
-; GENERIC-LABEL: test_vfmaddsubps_256:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; GENERIC-NEXT: vfmaddsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
-; GENERIC-NEXT: vfmaddsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vfmaddsubps_256:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; BDVER12-NEXT: vfmaddsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:1.00]
-; BDVER12-NEXT: vfmaddsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: vzeroupper # sched: [46:4.00]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER34-LABEL: test_vfmaddsubps_256:
-; BDVER34: # %bb.0:
-; BDVER34-NEXT: #APP
-; BDVER34-NEXT: vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER34-NEXT: vfmaddsubps (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER34-NEXT: vfmaddsubps %ymm1, (%rdi), %ymm0, %ymm0
-; BDVER34-NEXT: #NO_APP
-; BDVER34-NEXT: vzeroupper
-; BDVER34-NEXT: retq
- tail call void asm "vfmaddsubps $2, $1, $0, $0 \0A\09 vfmaddsubps $3, $1, $0, $0 \0A\09 vfmaddsubps $1, $3, $0, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
- ret void
-}
-
-;
-; VFMSUBADD
-;
-
-define void @test_vfmsubaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
-; GENERIC-LABEL: test_vfmsubaddpd_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT: vfmsubaddpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: vfmsubaddpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vfmsubaddpd_128:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; BDVER12-NEXT: vfmsubaddpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: vfmsubaddpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER34-LABEL: test_vfmsubaddpd_128:
-; BDVER34: # %bb.0:
-; BDVER34-NEXT: #APP
-; BDVER34-NEXT: vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfmsubaddpd (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfmsubaddpd %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER34-NEXT: #NO_APP
-; BDVER34-NEXT: retq
- tail call void asm "vfmsubaddpd $2, $1, $0, $0 \0A\09 vfmsubaddpd $3, $1, $0, $0 \0A\09 vfmsubaddpd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
- ret void
-}
-
-define void @test_vfmsubaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize {
-; GENERIC-LABEL: test_vfmsubaddpd_256:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; GENERIC-NEXT: vfmsubaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
-; GENERIC-NEXT: vfmsubaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vfmsubaddpd_256:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; BDVER12-NEXT: vfmsubaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:1.00]
-; BDVER12-NEXT: vfmsubaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: vzeroupper # sched: [46:4.00]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER34-LABEL: test_vfmsubaddpd_256:
-; BDVER34: # %bb.0:
-; BDVER34-NEXT: #APP
-; BDVER34-NEXT: vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER34-NEXT: vfmsubaddpd (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER34-NEXT: vfmsubaddpd %ymm1, (%rdi), %ymm0, %ymm0
-; BDVER34-NEXT: #NO_APP
-; BDVER34-NEXT: vzeroupper
-; BDVER34-NEXT: retq
- tail call void asm "vfmsubaddpd $2, $1, $0, $0 \0A\09 vfmsubaddpd $3, $1, $0, $0 \0A\09 vfmsubaddpd $1, $3, $0, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
- ret void
-}
-
-define void @test_vfmsubaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
-; GENERIC-LABEL: test_vfmsubaddps_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT: vfmsubaddps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: vfmsubaddps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vfmsubaddps_128:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; BDVER12-NEXT: vfmsubaddps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: vfmsubaddps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER34-LABEL: test_vfmsubaddps_128:
-; BDVER34: # %bb.0:
-; BDVER34-NEXT: #APP
-; BDVER34-NEXT: vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfmsubaddps (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfmsubaddps %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER34-NEXT: #NO_APP
-; BDVER34-NEXT: retq
- tail call void asm "vfmsubaddps $2, $1, $0, $0 \0A\09 vfmsubaddps $3, $1, $0, $0 \0A\09 vfmsubaddps $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
- ret void
-}
-
-define void @test_vfmsubaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize {
-; GENERIC-LABEL: test_vfmsubaddps_256:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; GENERIC-NEXT: vfmsubaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
-; GENERIC-NEXT: vfmsubaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vfmsubaddps_256:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; BDVER12-NEXT: vfmsubaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:1.00]
-; BDVER12-NEXT: vfmsubaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: vzeroupper # sched: [46:4.00]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER34-LABEL: test_vfmsubaddps_256:
-; BDVER34: # %bb.0:
-; BDVER34-NEXT: #APP
-; BDVER34-NEXT: vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER34-NEXT: vfmsubaddps (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER34-NEXT: vfmsubaddps %ymm1, (%rdi), %ymm0, %ymm0
-; BDVER34-NEXT: #NO_APP
-; BDVER34-NEXT: vzeroupper
-; BDVER34-NEXT: retq
- tail call void asm "vfmsubaddps $2, $1, $0, $0 \0A\09 vfmsubaddps $3, $1, $0, $0 \0A\09 vfmsubaddps $1, $3, $0, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
- ret void
-}
-
-;
-; VFMSUB
-;
-
-define void @test_vfmsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
-; GENERIC-LABEL: test_vfmsubpd_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT: vfmsubpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: vfmsubpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vfmsubpd_128:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; BDVER12-NEXT: vfmsubpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: vfmsubpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER34-LABEL: test_vfmsubpd_128:
-; BDVER34: # %bb.0:
-; BDVER34-NEXT: #APP
-; BDVER34-NEXT: vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfmsubpd (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfmsubpd %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER34-NEXT: #NO_APP
-; BDVER34-NEXT: retq
- tail call void asm "vfmsubpd $2, $1, $0, $0 \0A\09 vfmsubpd $3, $1, $0, $0 \0A\09 vfmsubpd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
- ret void
-}
-
-define void @test_vfmsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize {
-; GENERIC-LABEL: test_vfmsubpd_256:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; GENERIC-NEXT: vfmsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
-; GENERIC-NEXT: vfmsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vfmsubpd_256:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; BDVER12-NEXT: vfmsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:1.00]
-; BDVER12-NEXT: vfmsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: vzeroupper # sched: [46:4.00]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER34-LABEL: test_vfmsubpd_256:
-; BDVER34: # %bb.0:
-; BDVER34-NEXT: #APP
-; BDVER34-NEXT: vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER34-NEXT: vfmsubpd (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER34-NEXT: vfmsubpd %ymm1, (%rdi), %ymm0, %ymm0
-; BDVER34-NEXT: #NO_APP
-; BDVER34-NEXT: vzeroupper
-; BDVER34-NEXT: retq
- tail call void asm "vfmsubpd $2, $1, $0, $0 \0A\09 vfmsubpd $3, $1, $0, $0 \0A\09 vfmsubpd $1, $3, $0, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
- ret void
-}
-
-define void @test_vfmsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
-; GENERIC-LABEL: test_vfmsubps_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT: vfmsubps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: vfmsubps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vfmsubps_128:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; BDVER12-NEXT: vfmsubps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: vfmsubps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER34-LABEL: test_vfmsubps_128:
-; BDVER34: # %bb.0:
-; BDVER34-NEXT: #APP
-; BDVER34-NEXT: vfmsubps %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfmsubps (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfmsubps %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER34-NEXT: #NO_APP
-; BDVER34-NEXT: retq
- tail call void asm "vfmsubps $2, $1, $0, $0 \0A\09 vfmsubps $3, $1, $0, $0 \0A\09 vfmsubps $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
- ret void
-}
-
-define void @test_vfmsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize {
-; GENERIC-LABEL: test_vfmsubps_256:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; GENERIC-NEXT: vfmsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
-; GENERIC-NEXT: vfmsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vfmsubps_256:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; BDVER12-NEXT: vfmsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:1.00]
-; BDVER12-NEXT: vfmsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: vzeroupper # sched: [46:4.00]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER34-LABEL: test_vfmsubps_256:
-; BDVER34: # %bb.0:
-; BDVER34-NEXT: #APP
-; BDVER34-NEXT: vfmsubps %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER34-NEXT: vfmsubps (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER34-NEXT: vfmsubps %ymm1, (%rdi), %ymm0, %ymm0
-; BDVER34-NEXT: #NO_APP
-; BDVER34-NEXT: vzeroupper
-; BDVER34-NEXT: retq
- tail call void asm "vfmsubps $2, $1, $0, $0 \0A\09 vfmsubps $3, $1, $0, $0 \0A\09 vfmsubps $1, $3, $0, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
- ret void
-}
-
-define void @test_vfmsubsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
-; GENERIC-LABEL: test_vfmsubsd_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT: vfmsubsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: vfmsubsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vfmsubsd_128:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; BDVER12-NEXT: vfmsubsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: vfmsubsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER34-LABEL: test_vfmsubsd_128:
-; BDVER34: # %bb.0:
-; BDVER34-NEXT: #APP
-; BDVER34-NEXT: vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfmsubsd (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfmsubsd %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER34-NEXT: #NO_APP
-; BDVER34-NEXT: retq
- tail call void asm "vfmsubsd $2, $1, $0, $0 \0A\09 vfmsubsd $3, $1, $0, $0 \0A\09 vfmsubsd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
- ret void
-}
-
-define void @test_vfmsubss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
-; GENERIC-LABEL: test_vfmsubss_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT: vfmsubss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: vfmsubss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vfmsubss_128:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; BDVER12-NEXT: vfmsubss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: vfmsubss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER34-LABEL: test_vfmsubss_128:
-; BDVER34: # %bb.0:
-; BDVER34-NEXT: #APP
-; BDVER34-NEXT: vfmsubss %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfmsubss (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfmsubss %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER34-NEXT: #NO_APP
-; BDVER34-NEXT: retq
- tail call void asm "vfmsubss $2, $1, $0, $0 \0A\09 vfmsubss $3, $1, $0, $0 \0A\09 vfmsubss $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
- ret void
-}
-
-;
-; VFNMADD
-;
-
-define void @test_vfnmaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
-; GENERIC-LABEL: test_vfnmaddpd_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT: vfnmaddpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: vfnmaddpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vfnmaddpd_128:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; BDVER12-NEXT: vfnmaddpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: vfnmaddpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER34-LABEL: test_vfnmaddpd_128:
-; BDVER34: # %bb.0:
-; BDVER34-NEXT: #APP
-; BDVER34-NEXT: vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfnmaddpd (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfnmaddpd %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER34-NEXT: #NO_APP
-; BDVER34-NEXT: retq
- tail call void asm "vfnmaddpd $2, $1, $0, $0 \0A\09 vfnmaddpd $3, $1, $0, $0 \0A\09 vfnmaddpd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
- ret void
-}
-
-define void @test_vfnmaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize {
-; GENERIC-LABEL: test_vfnmaddpd_256:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; GENERIC-NEXT: vfnmaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
-; GENERIC-NEXT: vfnmaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vfnmaddpd_256:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; BDVER12-NEXT: vfnmaddpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:1.00]
-; BDVER12-NEXT: vfnmaddpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: vzeroupper # sched: [46:4.00]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER34-LABEL: test_vfnmaddpd_256:
-; BDVER34: # %bb.0:
-; BDVER34-NEXT: #APP
-; BDVER34-NEXT: vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER34-NEXT: vfnmaddpd (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER34-NEXT: vfnmaddpd %ymm1, (%rdi), %ymm0, %ymm0
-; BDVER34-NEXT: #NO_APP
-; BDVER34-NEXT: vzeroupper
-; BDVER34-NEXT: retq
- tail call void asm "vfnmaddpd $2, $1, $0, $0 \0A\09 vfnmaddpd $3, $1, $0, $0 \0A\09 vfnmaddpd $1, $3, $0, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
- ret void
-}
-
-define void @test_vfnmaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
-; GENERIC-LABEL: test_vfnmaddps_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT: vfnmaddps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: vfnmaddps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vfnmaddps_128:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; BDVER12-NEXT: vfnmaddps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: vfnmaddps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER34-LABEL: test_vfnmaddps_128:
-; BDVER34: # %bb.0:
-; BDVER34-NEXT: #APP
-; BDVER34-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfnmaddps (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfnmaddps %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER34-NEXT: #NO_APP
-; BDVER34-NEXT: retq
- tail call void asm "vfnmaddps $2, $1, $0, $0 \0A\09 vfnmaddps $3, $1, $0, $0 \0A\09 vfnmaddps $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
- ret void
-}
-
-define void @test_vfnmaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize {
-; GENERIC-LABEL: test_vfnmaddps_256:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; GENERIC-NEXT: vfnmaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
-; GENERIC-NEXT: vfnmaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vfnmaddps_256:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; BDVER12-NEXT: vfnmaddps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:1.00]
-; BDVER12-NEXT: vfnmaddps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: vzeroupper # sched: [46:4.00]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER34-LABEL: test_vfnmaddps_256:
-; BDVER34: # %bb.0:
-; BDVER34-NEXT: #APP
-; BDVER34-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER34-NEXT: vfnmaddps (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER34-NEXT: vfnmaddps %ymm1, (%rdi), %ymm0, %ymm0
-; BDVER34-NEXT: #NO_APP
-; BDVER34-NEXT: vzeroupper
-; BDVER34-NEXT: retq
- tail call void asm "vfnmaddps $2, $1, $0, $0 \0A\09 vfnmaddps $3, $1, $0, $0 \0A\09 vfnmaddps $1, $3, $0, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
- ret void
-}
-
-define void @test_vfnmaddsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
-; GENERIC-LABEL: test_vfnmaddsd_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT: vfnmaddsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: vfnmaddsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vfnmaddsd_128:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; BDVER12-NEXT: vfnmaddsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: vfnmaddsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER34-LABEL: test_vfnmaddsd_128:
-; BDVER34: # %bb.0:
-; BDVER34-NEXT: #APP
-; BDVER34-NEXT: vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfnmaddsd (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfnmaddsd %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER34-NEXT: #NO_APP
-; BDVER34-NEXT: retq
- tail call void asm "vfnmaddsd $2, $1, $0, $0 \0A\09 vfnmaddsd $3, $1, $0, $0 \0A\09 vfnmaddsd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
- ret void
-}
-
-define void @test_vfnmaddss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
-; GENERIC-LABEL: test_vfnmaddss_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT: vfnmaddss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: vfnmaddss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vfnmaddss_128:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; BDVER12-NEXT: vfnmaddss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: vfnmaddss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER34-LABEL: test_vfnmaddss_128:
-; BDVER34: # %bb.0:
-; BDVER34-NEXT: #APP
-; BDVER34-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfnmaddss (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfnmaddss %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER34-NEXT: #NO_APP
-; BDVER34-NEXT: retq
- tail call void asm "vfnmaddss $2, $1, $0, $0 \0A\09 vfnmaddss $3, $1, $0, $0 \0A\09 vfnmaddss $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
- ret void
-}
-
-;
-; VFNMSUB
-;
-
-define void @test_vfnmsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
-; GENERIC-LABEL: test_vfnmsubpd_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT: vfnmsubpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: vfnmsubpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vfnmsubpd_128:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; BDVER12-NEXT: vfnmsubpd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: vfnmsubpd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER34-LABEL: test_vfnmsubpd_128:
-; BDVER34: # %bb.0:
-; BDVER34-NEXT: #APP
-; BDVER34-NEXT: vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfnmsubpd (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfnmsubpd %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER34-NEXT: #NO_APP
-; BDVER34-NEXT: retq
- tail call void asm "vfnmsubpd $2, $1, $0, $0 \0A\09 vfnmsubpd $3, $1, $0, $0 \0A\09 vfnmsubpd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
- ret void
-}
-
-define void @test_vfnmsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) optsize {
-; GENERIC-LABEL: test_vfnmsubpd_256:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; GENERIC-NEXT: vfnmsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
-; GENERIC-NEXT: vfnmsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vfnmsubpd_256:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; BDVER12-NEXT: vfnmsubpd (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:1.00]
-; BDVER12-NEXT: vfnmsubpd %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: vzeroupper # sched: [46:4.00]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER34-LABEL: test_vfnmsubpd_256:
-; BDVER34: # %bb.0:
-; BDVER34-NEXT: #APP
-; BDVER34-NEXT: vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER34-NEXT: vfnmsubpd (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER34-NEXT: vfnmsubpd %ymm1, (%rdi), %ymm0, %ymm0
-; BDVER34-NEXT: #NO_APP
-; BDVER34-NEXT: vzeroupper
-; BDVER34-NEXT: retq
- tail call void asm "vfnmsubpd $2, $1, $0, $0 \0A\09 vfnmsubpd $3, $1, $0, $0 \0A\09 vfnmsubpd $1, $3, $0, $0", "x,x,x,*m"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) nounwind
- ret void
-}
-
-define void @test_vfnmsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
-; GENERIC-LABEL: test_vfnmsubps_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT: vfnmsubps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: vfnmsubps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vfnmsubps_128:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; BDVER12-NEXT: vfnmsubps (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: vfnmsubps %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER34-LABEL: test_vfnmsubps_128:
-; BDVER34: # %bb.0:
-; BDVER34-NEXT: #APP
-; BDVER34-NEXT: vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfnmsubps (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfnmsubps %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER34-NEXT: #NO_APP
-; BDVER34-NEXT: retq
- tail call void asm "vfnmsubps $2, $1, $0, $0 \0A\09 vfnmsubps $3, $1, $0, $0 \0A\09 vfnmsubps $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
- ret void
-}
-
-define void @test_vfnmsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) optsize {
-; GENERIC-LABEL: test_vfnmsubps_256:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; GENERIC-NEXT: vfnmsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
-; GENERIC-NEXT: vfnmsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vfnmsubps_256:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; BDVER12-NEXT: vfnmsubps (%rdi), %ymm1, %ymm0, %ymm0 # sched: [10:1.00]
-; BDVER12-NEXT: vfnmsubps %ymm1, (%rdi), %ymm0, %ymm0 # sched: [10:1.00]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: vzeroupper # sched: [46:4.00]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER34-LABEL: test_vfnmsubps_256:
-; BDVER34: # %bb.0:
-; BDVER34-NEXT: #APP
-; BDVER34-NEXT: vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER34-NEXT: vfnmsubps (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER34-NEXT: vfnmsubps %ymm1, (%rdi), %ymm0, %ymm0
-; BDVER34-NEXT: #NO_APP
-; BDVER34-NEXT: vzeroupper
-; BDVER34-NEXT: retq
- tail call void asm "vfnmsubps $2, $1, $0, $0 \0A\09 vfnmsubps $3, $1, $0, $0 \0A\09 vfnmsubps $1, $3, $0, $0", "x,x,x,*m"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) nounwind
- ret void
-}
-
-define void @test_vfnmsubsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) optsize {
-; GENERIC-LABEL: test_vfnmsubsd_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT: vfnmsubsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: vfnmsubsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vfnmsubsd_128:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; BDVER12-NEXT: vfnmsubsd (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: vfnmsubsd %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER34-LABEL: test_vfnmsubsd_128:
-; BDVER34: # %bb.0:
-; BDVER34-NEXT: #APP
-; BDVER34-NEXT: vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfnmsubsd (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfnmsubsd %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER34-NEXT: #NO_APP
-; BDVER34-NEXT: retq
- tail call void asm "vfnmsubsd $2, $1, $0, $0 \0A\09 vfnmsubsd $3, $1, $0, $0 \0A\09 vfnmsubsd $1, $3, $0, $0", "x,x,x,*m"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) nounwind
- ret void
-}
-
-define void @test_vfnmsubss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) optsize {
-; GENERIC-LABEL: test_vfnmsubss_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; GENERIC-NEXT: vfnmsubss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: vfnmsubss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vfnmsubss_128:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; BDVER12-NEXT: vfnmsubss (%rdi), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: vfnmsubss %xmm1, (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER34-LABEL: test_vfnmsubss_128:
-; BDVER34: # %bb.0:
-; BDVER34-NEXT: #APP
-; BDVER34-NEXT: vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfnmsubss (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER34-NEXT: vfnmsubss %xmm1, (%rdi), %xmm0, %xmm0
-; BDVER34-NEXT: #NO_APP
-; BDVER34-NEXT: retq
- tail call void asm "vfnmsubss $2, $1, $0, $0 \0A\09 vfnmsubss $3, $1, $0, $0 \0A\09 vfnmsubss $1, $3, $0, $0", "x,x,x,*m"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) nounwind
- ret void
-}
diff --git a/llvm/test/CodeGen/X86/fsgsbase-schedule.ll b/llvm/test/CodeGen/X86/fsgsbase-schedule.ll
deleted file mode 100644
index 653df60f802..00000000000
--- a/llvm/test/CodeGen/X86/fsgsbase-schedule.ll
+++ /dev/null
@@ -1,411 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=fsgsbase | FileCheck %s --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=goldmont | FileCheck %s --check-prefix=GLM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=IVY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=KNL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver3 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER3
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver4 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER4
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=ZNVER1
-
-define i32 @test_x86_rdfsbase_32() {
-; GENERIC-LABEL: test_x86_rdfsbase_32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: rdfsbasel %eax # sched: [100:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; GLM-LABEL: test_x86_rdfsbase_32:
-; GLM: # %bb.0:
-; GLM-NEXT: rdfsbasel %eax # sched: [100:1.00]
-; GLM-NEXT: retq # sched: [4:1.00]
-;
-; IVY-LABEL: test_x86_rdfsbase_32:
-; IVY: # %bb.0:
-; IVY-NEXT: rdfsbasel %eax # sched: [100:0.33]
-; IVY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_x86_rdfsbase_32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: rdfsbasel %eax # sched: [100:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_x86_rdfsbase_32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: rdfsbasel %eax # sched: [100:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_x86_rdfsbase_32:
-; SKX: # %bb.0:
-; SKX-NEXT: rdfsbasel %eax # sched: [100:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_x86_rdfsbase_32:
-; KNL: # %bb.0:
-; KNL-NEXT: rdfsbasel %eax # sched: [100:0.25]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; BDVER-LABEL: test_x86_rdfsbase_32:
-; BDVER: # %bb.0:
-; BDVER-NEXT: rdfsbasel %eax
-; BDVER-NEXT: retq
-;
-; ZNVER1-LABEL: test_x86_rdfsbase_32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: rdfsbasel %eax # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %res = call i32 @llvm.x86.rdfsbase.32()
- ret i32 %res
-}
-declare i32 @llvm.x86.rdfsbase.32() nounwind readnone
-
-define i32 @test_x86_rdgsbase_32() {
-; GENERIC-LABEL: test_x86_rdgsbase_32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: rdgsbasel %eax # sched: [100:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; GLM-LABEL: test_x86_rdgsbase_32:
-; GLM: # %bb.0:
-; GLM-NEXT: rdgsbasel %eax # sched: [100:1.00]
-; GLM-NEXT: retq # sched: [4:1.00]
-;
-; IVY-LABEL: test_x86_rdgsbase_32:
-; IVY: # %bb.0:
-; IVY-NEXT: rdgsbasel %eax # sched: [100:0.33]
-; IVY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_x86_rdgsbase_32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: rdgsbasel %eax # sched: [100:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_x86_rdgsbase_32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: rdgsbasel %eax # sched: [100:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_x86_rdgsbase_32:
-; SKX: # %bb.0:
-; SKX-NEXT: rdgsbasel %eax # sched: [100:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_x86_rdgsbase_32:
-; KNL: # %bb.0:
-; KNL-NEXT: rdgsbasel %eax # sched: [100:0.25]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; BDVER-LABEL: test_x86_rdgsbase_32:
-; BDVER: # %bb.0:
-; BDVER-NEXT: rdgsbasel %eax
-; BDVER-NEXT: retq
-;
-; ZNVER1-LABEL: test_x86_rdgsbase_32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: rdgsbasel %eax # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %res = call i32 @llvm.x86.rdgsbase.32()
- ret i32 %res
-}
-declare i32 @llvm.x86.rdgsbase.32() nounwind readnone
-
-define i64 @test_x86_rdfsbase_64() {
-; GENERIC-LABEL: test_x86_rdfsbase_64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: rdfsbaseq %rax # sched: [100:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; GLM-LABEL: test_x86_rdfsbase_64:
-; GLM: # %bb.0:
-; GLM-NEXT: rdfsbaseq %rax # sched: [100:1.00]
-; GLM-NEXT: retq # sched: [4:1.00]
-;
-; IVY-LABEL: test_x86_rdfsbase_64:
-; IVY: # %bb.0:
-; IVY-NEXT: rdfsbaseq %rax # sched: [100:0.33]
-; IVY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_x86_rdfsbase_64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: rdfsbaseq %rax # sched: [100:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_x86_rdfsbase_64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: rdfsbaseq %rax # sched: [100:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_x86_rdfsbase_64:
-; SKX: # %bb.0:
-; SKX-NEXT: rdfsbaseq %rax # sched: [100:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_x86_rdfsbase_64:
-; KNL: # %bb.0:
-; KNL-NEXT: rdfsbaseq %rax # sched: [100:0.25]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; BDVER-LABEL: test_x86_rdfsbase_64:
-; BDVER: # %bb.0:
-; BDVER-NEXT: rdfsbaseq %rax
-; BDVER-NEXT: retq
-;
-; ZNVER1-LABEL: test_x86_rdfsbase_64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: rdfsbaseq %rax # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %res = call i64 @llvm.x86.rdfsbase.64()
- ret i64 %res
-}
-declare i64 @llvm.x86.rdfsbase.64() nounwind readnone
-
-define i64 @test_x86_rdgsbase_64() {
-; GENERIC-LABEL: test_x86_rdgsbase_64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: rdgsbaseq %rax # sched: [100:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; GLM-LABEL: test_x86_rdgsbase_64:
-; GLM: # %bb.0:
-; GLM-NEXT: rdgsbaseq %rax # sched: [100:1.00]
-; GLM-NEXT: retq # sched: [4:1.00]
-;
-; IVY-LABEL: test_x86_rdgsbase_64:
-; IVY: # %bb.0:
-; IVY-NEXT: rdgsbaseq %rax # sched: [100:0.33]
-; IVY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_x86_rdgsbase_64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: rdgsbaseq %rax # sched: [100:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_x86_rdgsbase_64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: rdgsbaseq %rax # sched: [100:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_x86_rdgsbase_64:
-; SKX: # %bb.0:
-; SKX-NEXT: rdgsbaseq %rax # sched: [100:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_x86_rdgsbase_64:
-; KNL: # %bb.0:
-; KNL-NEXT: rdgsbaseq %rax # sched: [100:0.25]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; BDVER-LABEL: test_x86_rdgsbase_64:
-; BDVER: # %bb.0:
-; BDVER-NEXT: rdgsbaseq %rax
-; BDVER-NEXT: retq
-;
-; ZNVER1-LABEL: test_x86_rdgsbase_64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: rdgsbaseq %rax # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %res = call i64 @llvm.x86.rdgsbase.64()
- ret i64 %res
-}
-declare i64 @llvm.x86.rdgsbase.64() nounwind readnone
-
-define void @test_x86_wrfsbase_32(i32 %x) {
-; GENERIC-LABEL: test_x86_wrfsbase_32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: wrfsbasel %edi # sched: [100:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; GLM-LABEL: test_x86_wrfsbase_32:
-; GLM: # %bb.0:
-; GLM-NEXT: wrfsbasel %edi # sched: [100:1.00]
-; GLM-NEXT: retq # sched: [4:1.00]
-;
-; IVY-LABEL: test_x86_wrfsbase_32:
-; IVY: # %bb.0:
-; IVY-NEXT: wrfsbasel %edi # sched: [100:0.33]
-; IVY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_x86_wrfsbase_32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: wrfsbasel %edi # sched: [100:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_x86_wrfsbase_32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: wrfsbasel %edi # sched: [100:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_x86_wrfsbase_32:
-; SKX: # %bb.0:
-; SKX-NEXT: wrfsbasel %edi # sched: [100:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_x86_wrfsbase_32:
-; KNL: # %bb.0:
-; KNL-NEXT: wrfsbasel %edi # sched: [100:0.25]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; BDVER-LABEL: test_x86_wrfsbase_32:
-; BDVER: # %bb.0:
-; BDVER-NEXT: wrfsbasel %edi
-; BDVER-NEXT: retq
-;
-; ZNVER1-LABEL: test_x86_wrfsbase_32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: wrfsbasel %edi # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void @llvm.x86.wrfsbase.32(i32 %x)
- ret void
-}
-declare void @llvm.x86.wrfsbase.32(i32) nounwind readnone
-
-define void @test_x86_wrgsbase_32(i32 %x) {
-; GENERIC-LABEL: test_x86_wrgsbase_32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: wrgsbasel %edi # sched: [100:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; GLM-LABEL: test_x86_wrgsbase_32:
-; GLM: # %bb.0:
-; GLM-NEXT: wrgsbasel %edi # sched: [100:1.00]
-; GLM-NEXT: retq # sched: [4:1.00]
-;
-; IVY-LABEL: test_x86_wrgsbase_32:
-; IVY: # %bb.0:
-; IVY-NEXT: wrgsbasel %edi # sched: [100:0.33]
-; IVY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_x86_wrgsbase_32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: wrgsbasel %edi # sched: [100:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_x86_wrgsbase_32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: wrgsbasel %edi # sched: [100:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_x86_wrgsbase_32:
-; SKX: # %bb.0:
-; SKX-NEXT: wrgsbasel %edi # sched: [100:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_x86_wrgsbase_32:
-; KNL: # %bb.0:
-; KNL-NEXT: wrgsbasel %edi # sched: [100:0.25]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; BDVER-LABEL: test_x86_wrgsbase_32:
-; BDVER: # %bb.0:
-; BDVER-NEXT: wrgsbasel %edi
-; BDVER-NEXT: retq
-;
-; ZNVER1-LABEL: test_x86_wrgsbase_32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: wrgsbasel %edi # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void @llvm.x86.wrgsbase.32(i32 %x)
- ret void
-}
-declare void @llvm.x86.wrgsbase.32(i32) nounwind readnone
-
-define void @test_x86_wrfsbase_64(i64 %x) {
-; GENERIC-LABEL: test_x86_wrfsbase_64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: wrfsbaseq %rdi # sched: [100:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; GLM-LABEL: test_x86_wrfsbase_64:
-; GLM: # %bb.0:
-; GLM-NEXT: wrfsbaseq %rdi # sched: [100:1.00]
-; GLM-NEXT: retq # sched: [4:1.00]
-;
-; IVY-LABEL: test_x86_wrfsbase_64:
-; IVY: # %bb.0:
-; IVY-NEXT: wrfsbaseq %rdi # sched: [100:0.33]
-; IVY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_x86_wrfsbase_64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: wrfsbaseq %rdi # sched: [100:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_x86_wrfsbase_64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: wrfsbaseq %rdi # sched: [100:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_x86_wrfsbase_64:
-; SKX: # %bb.0:
-; SKX-NEXT: wrfsbaseq %rdi # sched: [100:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_x86_wrfsbase_64:
-; KNL: # %bb.0:
-; KNL-NEXT: wrfsbaseq %rdi # sched: [100:0.25]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; BDVER-LABEL: test_x86_wrfsbase_64:
-; BDVER: # %bb.0:
-; BDVER-NEXT: wrfsbaseq %rdi
-; BDVER-NEXT: retq
-;
-; ZNVER1-LABEL: test_x86_wrfsbase_64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: wrfsbaseq %rdi # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void @llvm.x86.wrfsbase.64(i64 %x)
- ret void
-}
-declare void @llvm.x86.wrfsbase.64(i64) nounwind readnone
-
-define void @test_x86_wrgsbase_64(i64 %x) {
-; GENERIC-LABEL: test_x86_wrgsbase_64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: wrgsbaseq %rdi # sched: [100:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; GLM-LABEL: test_x86_wrgsbase_64:
-; GLM: # %bb.0:
-; GLM-NEXT: wrgsbaseq %rdi # sched: [100:1.00]
-; GLM-NEXT: retq # sched: [4:1.00]
-;
-; IVY-LABEL: test_x86_wrgsbase_64:
-; IVY: # %bb.0:
-; IVY-NEXT: wrgsbaseq %rdi # sched: [100:0.33]
-; IVY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_x86_wrgsbase_64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: wrgsbaseq %rdi # sched: [100:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_x86_wrgsbase_64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: wrgsbaseq %rdi # sched: [100:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_x86_wrgsbase_64:
-; SKX: # %bb.0:
-; SKX-NEXT: wrgsbaseq %rdi # sched: [100:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: test_x86_wrgsbase_64:
-; KNL: # %bb.0:
-; KNL-NEXT: wrgsbaseq %rdi # sched: [100:0.25]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; BDVER-LABEL: test_x86_wrgsbase_64:
-; BDVER: # %bb.0:
-; BDVER-NEXT: wrgsbaseq %rdi
-; BDVER-NEXT: retq
-;
-; ZNVER1-LABEL: test_x86_wrgsbase_64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: wrgsbaseq %rdi # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void @llvm.x86.wrgsbase.64(i64 %x)
- ret void
-}
-declare void @llvm.x86.wrgsbase.64(i64) nounwind readnone
diff --git a/llvm/test/CodeGen/X86/lea32-schedule.ll b/llvm/test/CodeGen/X86/lea32-schedule.ll
deleted file mode 100644
index 1e8ebfb766b..00000000000
--- a/llvm/test/CodeGen/X86/lea32-schedule.ll
+++ /dev/null
@@ -1,898 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
-
-define i32 @test_lea_offset(i32) {
-; GENERIC-LABEL: test_lea_offset:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: # kill: def $edi killed $edi def $rdi
-; GENERIC-NEXT: leal -24(%rdi), %eax # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_lea_offset:
-; ATOM: # %bb.0:
-; ATOM-NEXT: # kill: def $edi killed $edi def $rdi
-; ATOM-NEXT: leal -24(%rdi), %eax # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_lea_offset:
-; SLM: # %bb.0:
-; SLM-NEXT: # kill: def $edi killed $edi def $rdi
-; SLM-NEXT: leal -24(%rdi), %eax # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_lea_offset:
-; SANDY: # %bb.0:
-; SANDY-NEXT: # kill: def $edi killed $edi def $rdi
-; SANDY-NEXT: leal -24(%rdi), %eax # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_lea_offset:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: # kill: def $edi killed $edi def $rdi
-; HASWELL-NEXT: leal -24(%rdi), %eax # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_lea_offset:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: # kill: def $edi killed $edi def $rdi
-; BROADWELL-NEXT: leal -24(%rdi), %eax # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_lea_offset:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: # kill: def $edi killed $edi def $rdi
-; SKYLAKE-NEXT: leal -24(%rdi), %eax # sched: [1:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_lea_offset:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
-; BDVER2-NEXT: leal -24(%rdi), %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_lea_offset:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: # kill: def $edi killed $edi def $rdi
-; BTVER2-NEXT: leal -24(%rdi), %eax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_lea_offset:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: # kill: def $edi killed $edi def $rdi
-; ZNVER1-NEXT: leal -24(%rdi), %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %2 = add nsw i32 %0, -24
- ret i32 %2
-}
-
-define i32 @test_lea_offset_big(i32) {
-; GENERIC-LABEL: test_lea_offset_big:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: # kill: def $edi killed $edi def $rdi
-; GENERIC-NEXT: leal 1024(%rdi), %eax # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_lea_offset_big:
-; ATOM: # %bb.0:
-; ATOM-NEXT: # kill: def $edi killed $edi def $rdi
-; ATOM-NEXT: leal 1024(%rdi), %eax # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_lea_offset_big:
-; SLM: # %bb.0:
-; SLM-NEXT: # kill: def $edi killed $edi def $rdi
-; SLM-NEXT: leal 1024(%rdi), %eax # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_lea_offset_big:
-; SANDY: # %bb.0:
-; SANDY-NEXT: # kill: def $edi killed $edi def $rdi
-; SANDY-NEXT: leal 1024(%rdi), %eax # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_lea_offset_big:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: # kill: def $edi killed $edi def $rdi
-; HASWELL-NEXT: leal 1024(%rdi), %eax # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_lea_offset_big:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: # kill: def $edi killed $edi def $rdi
-; BROADWELL-NEXT: leal 1024(%rdi), %eax # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_lea_offset_big:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: # kill: def $edi killed $edi def $rdi
-; SKYLAKE-NEXT: leal 1024(%rdi), %eax # sched: [1:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_lea_offset_big:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
-; BDVER2-NEXT: leal 1024(%rdi), %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_lea_offset_big:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: # kill: def $edi killed $edi def $rdi
-; BTVER2-NEXT: leal 1024(%rdi), %eax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_lea_offset_big:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: # kill: def $edi killed $edi def $rdi
-; ZNVER1-NEXT: leal 1024(%rdi), %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %2 = add nsw i32 %0, 1024
- ret i32 %2
-}
-
-; Function Attrs: norecurse nounwind readnone uwtable
-define i32 @test_lea_add(i32, i32) {
-; GENERIC-LABEL: test_lea_add:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: # kill: def $esi killed $esi def $rsi
-; GENERIC-NEXT: # kill: def $edi killed $edi def $rdi
-; GENERIC-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_lea_add:
-; ATOM: # %bb.0:
-; ATOM-NEXT: # kill: def $esi killed $esi def $rsi
-; ATOM-NEXT: # kill: def $edi killed $edi def $rdi
-; ATOM-NEXT: leal (%rdi,%rsi), %eax # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_lea_add:
-; SLM: # %bb.0:
-; SLM-NEXT: # kill: def $esi killed $esi def $rsi
-; SLM-NEXT: # kill: def $edi killed $edi def $rdi
-; SLM-NEXT: leal (%rdi,%rsi), %eax # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_lea_add:
-; SANDY: # %bb.0:
-; SANDY-NEXT: # kill: def $esi killed $esi def $rsi
-; SANDY-NEXT: # kill: def $edi killed $edi def $rdi
-; SANDY-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_lea_add:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: # kill: def $esi killed $esi def $rsi
-; HASWELL-NEXT: # kill: def $edi killed $edi def $rdi
-; HASWELL-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_lea_add:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: # kill: def $esi killed $esi def $rsi
-; BROADWELL-NEXT: # kill: def $edi killed $edi def $rdi
-; BROADWELL-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_lea_add:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: # kill: def $esi killed $esi def $rsi
-; SKYLAKE-NEXT: # kill: def $edi killed $edi def $rdi
-; SKYLAKE-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_lea_add:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: # kill: def $esi killed $esi def $rsi
-; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
-; BDVER2-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_lea_add:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: # kill: def $esi killed $esi def $rsi
-; BTVER2-NEXT: # kill: def $edi killed $edi def $rdi
-; BTVER2-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_lea_add:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: # kill: def $esi killed $esi def $rsi
-; ZNVER1-NEXT: # kill: def $edi killed $edi def $rdi
-; ZNVER1-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %3 = add nsw i32 %1, %0
- ret i32 %3
-}
-
-define i32 @test_lea_add_offset(i32, i32) {
-; GENERIC-LABEL: test_lea_add_offset:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: # kill: def $esi killed $esi def $rsi
-; GENERIC-NEXT: # kill: def $edi killed $edi def $rdi
-; GENERIC-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50]
-; GENERIC-NEXT: addl $16, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_lea_add_offset:
-; ATOM: # %bb.0:
-; ATOM-NEXT: # kill: def $esi killed $esi def $rsi
-; ATOM-NEXT: # kill: def $edi killed $edi def $rdi
-; ATOM-NEXT: leal 16(%rdi,%rsi), %eax # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_lea_add_offset:
-; SLM: # %bb.0:
-; SLM-NEXT: # kill: def $esi killed $esi def $rsi
-; SLM-NEXT: # kill: def $edi killed $edi def $rdi
-; SLM-NEXT: leal 16(%rdi,%rsi), %eax # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_lea_add_offset:
-; SANDY: # %bb.0:
-; SANDY-NEXT: # kill: def $esi killed $esi def $rsi
-; SANDY-NEXT: # kill: def $edi killed $edi def $rdi
-; SANDY-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50]
-; SANDY-NEXT: addl $16, %eax # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_lea_add_offset:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: # kill: def $esi killed $esi def $rsi
-; HASWELL-NEXT: # kill: def $edi killed $edi def $rdi
-; HASWELL-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50]
-; HASWELL-NEXT: addl $16, %eax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_lea_add_offset:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: # kill: def $esi killed $esi def $rsi
-; BROADWELL-NEXT: # kill: def $edi killed $edi def $rdi
-; BROADWELL-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50]
-; BROADWELL-NEXT: addl $16, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_lea_add_offset:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: # kill: def $esi killed $esi def $rsi
-; SKYLAKE-NEXT: # kill: def $edi killed $edi def $rdi
-; SKYLAKE-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50]
-; SKYLAKE-NEXT: addl $16, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_lea_add_offset:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: # kill: def $esi killed $esi def $rsi
-; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
-; BDVER2-NEXT: leal 16(%rdi,%rsi), %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_lea_add_offset:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: # kill: def $esi killed $esi def $rsi
-; BTVER2-NEXT: # kill: def $edi killed $edi def $rdi
-; BTVER2-NEXT: leal 16(%rdi,%rsi), %eax # sched: [2:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_lea_add_offset:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: # kill: def $esi killed $esi def $rsi
-; ZNVER1-NEXT: # kill: def $edi killed $edi def $rdi
-; ZNVER1-NEXT: leal 16(%rdi,%rsi), %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %3 = add i32 %0, 16
- %4 = add i32 %3, %1
- ret i32 %4
-}
-
-define i32 @test_lea_add_offset_big(i32, i32) {
-; GENERIC-LABEL: test_lea_add_offset_big:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: # kill: def $esi killed $esi def $rsi
-; GENERIC-NEXT: # kill: def $edi killed $edi def $rdi
-; GENERIC-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50]
-; GENERIC-NEXT: addl $-4096, %eax # imm = 0xF000
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_lea_add_offset_big:
-; ATOM: # %bb.0:
-; ATOM-NEXT: # kill: def $esi killed $esi def $rsi
-; ATOM-NEXT: # kill: def $edi killed $edi def $rdi
-; ATOM-NEXT: leal -4096(%rdi,%rsi), %eax # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_lea_add_offset_big:
-; SLM: # %bb.0:
-; SLM-NEXT: # kill: def $esi killed $esi def $rsi
-; SLM-NEXT: # kill: def $edi killed $edi def $rdi
-; SLM-NEXT: leal -4096(%rdi,%rsi), %eax # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_lea_add_offset_big:
-; SANDY: # %bb.0:
-; SANDY-NEXT: # kill: def $esi killed $esi def $rsi
-; SANDY-NEXT: # kill: def $edi killed $edi def $rdi
-; SANDY-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50]
-; SANDY-NEXT: addl $-4096, %eax # imm = 0xF000
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_lea_add_offset_big:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: # kill: def $esi killed $esi def $rsi
-; HASWELL-NEXT: # kill: def $edi killed $edi def $rdi
-; HASWELL-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50]
-; HASWELL-NEXT: addl $-4096, %eax # imm = 0xF000
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_lea_add_offset_big:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: # kill: def $esi killed $esi def $rsi
-; BROADWELL-NEXT: # kill: def $edi killed $edi def $rdi
-; BROADWELL-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50]
-; BROADWELL-NEXT: addl $-4096, %eax # imm = 0xF000
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_lea_add_offset_big:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: # kill: def $esi killed $esi def $rsi
-; SKYLAKE-NEXT: # kill: def $edi killed $edi def $rdi
-; SKYLAKE-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50]
-; SKYLAKE-NEXT: addl $-4096, %eax # imm = 0xF000
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_lea_add_offset_big:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: # kill: def $esi killed $esi def $rsi
-; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
-; BDVER2-NEXT: leal -4096(%rdi,%rsi), %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_lea_add_offset_big:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: # kill: def $esi killed $esi def $rsi
-; BTVER2-NEXT: # kill: def $edi killed $edi def $rdi
-; BTVER2-NEXT: leal -4096(%rdi,%rsi), %eax # sched: [2:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_lea_add_offset_big:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: # kill: def $esi killed $esi def $rsi
-; ZNVER1-NEXT: # kill: def $edi killed $edi def $rdi
-; ZNVER1-NEXT: leal -4096(%rdi,%rsi), %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %3 = add i32 %0, -4096
- %4 = add i32 %3, %1
- ret i32 %4
-}
-
-define i32 @test_lea_mul(i32) {
-; GENERIC-LABEL: test_lea_mul:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: # kill: def $edi killed $edi def $rdi
-; GENERIC-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_lea_mul:
-; ATOM: # %bb.0:
-; ATOM-NEXT: # kill: def $edi killed $edi def $rdi
-; ATOM-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_lea_mul:
-; SLM: # %bb.0:
-; SLM-NEXT: # kill: def $edi killed $edi def $rdi
-; SLM-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_lea_mul:
-; SANDY: # %bb.0:
-; SANDY-NEXT: # kill: def $edi killed $edi def $rdi
-; SANDY-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_lea_mul:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: # kill: def $edi killed $edi def $rdi
-; HASWELL-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_lea_mul:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: # kill: def $edi killed $edi def $rdi
-; BROADWELL-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_lea_mul:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: # kill: def $edi killed $edi def $rdi
-; SKYLAKE-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_lea_mul:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
-; BDVER2-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_lea_mul:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: # kill: def $edi killed $edi def $rdi
-; BTVER2-NEXT: leal (%rdi,%rdi,2), %eax # sched: [2:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_lea_mul:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: # kill: def $edi killed $edi def $rdi
-; ZNVER1-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %2 = mul nsw i32 %0, 3
- ret i32 %2
-}
-
-define i32 @test_lea_mul_offset(i32) {
-; GENERIC-LABEL: test_lea_mul_offset:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: # kill: def $edi killed $edi def $rdi
-; GENERIC-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
-; GENERIC-NEXT: addl $-32, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_lea_mul_offset:
-; ATOM: # %bb.0:
-; ATOM-NEXT: # kill: def $edi killed $edi def $rdi
-; ATOM-NEXT: leal -32(%rdi,%rdi,2), %eax # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_lea_mul_offset:
-; SLM: # %bb.0:
-; SLM-NEXT: # kill: def $edi killed $edi def $rdi
-; SLM-NEXT: leal -32(%rdi,%rdi,2), %eax # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_lea_mul_offset:
-; SANDY: # %bb.0:
-; SANDY-NEXT: # kill: def $edi killed $edi def $rdi
-; SANDY-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
-; SANDY-NEXT: addl $-32, %eax # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_lea_mul_offset:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: # kill: def $edi killed $edi def $rdi
-; HASWELL-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
-; HASWELL-NEXT: addl $-32, %eax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_lea_mul_offset:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: # kill: def $edi killed $edi def $rdi
-; BROADWELL-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
-; BROADWELL-NEXT: addl $-32, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_lea_mul_offset:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: # kill: def $edi killed $edi def $rdi
-; SKYLAKE-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
-; SKYLAKE-NEXT: addl $-32, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_lea_mul_offset:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
-; BDVER2-NEXT: leal -32(%rdi,%rdi,2), %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_lea_mul_offset:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: # kill: def $edi killed $edi def $rdi
-; BTVER2-NEXT: leal -32(%rdi,%rdi,2), %eax # sched: [2:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_lea_mul_offset:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: # kill: def $edi killed $edi def $rdi
-; ZNVER1-NEXT: leal -32(%rdi,%rdi,2), %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %2 = mul nsw i32 %0, 3
- %3 = add nsw i32 %2, -32
- ret i32 %3
-}
-
-define i32 @test_lea_mul_offset_big(i32) {
-; GENERIC-LABEL: test_lea_mul_offset_big:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: # kill: def $edi killed $edi def $rdi
-; GENERIC-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
-; GENERIC-NEXT: addl $10000, %eax # imm = 0x2710
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_lea_mul_offset_big:
-; ATOM: # %bb.0:
-; ATOM-NEXT: # kill: def $edi killed $edi def $rdi
-; ATOM-NEXT: leal 10000(%rdi,%rdi,8), %eax # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_lea_mul_offset_big:
-; SLM: # %bb.0:
-; SLM-NEXT: # kill: def $edi killed $edi def $rdi
-; SLM-NEXT: leal 10000(%rdi,%rdi,8), %eax # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_lea_mul_offset_big:
-; SANDY: # %bb.0:
-; SANDY-NEXT: # kill: def $edi killed $edi def $rdi
-; SANDY-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
-; SANDY-NEXT: addl $10000, %eax # imm = 0x2710
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_lea_mul_offset_big:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: # kill: def $edi killed $edi def $rdi
-; HASWELL-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
-; HASWELL-NEXT: addl $10000, %eax # imm = 0x2710
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_lea_mul_offset_big:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: # kill: def $edi killed $edi def $rdi
-; BROADWELL-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
-; BROADWELL-NEXT: addl $10000, %eax # imm = 0x2710
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_lea_mul_offset_big:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: # kill: def $edi killed $edi def $rdi
-; SKYLAKE-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
-; SKYLAKE-NEXT: addl $10000, %eax # imm = 0x2710
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_lea_mul_offset_big:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
-; BDVER2-NEXT: leal 10000(%rdi,%rdi,8), %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_lea_mul_offset_big:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: # kill: def $edi killed $edi def $rdi
-; BTVER2-NEXT: leal 10000(%rdi,%rdi,8), %eax # sched: [2:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_lea_mul_offset_big:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: # kill: def $edi killed $edi def $rdi
-; ZNVER1-NEXT: leal 10000(%rdi,%rdi,8), %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %2 = mul nsw i32 %0, 9
- %3 = add nsw i32 %2, 10000
- ret i32 %3
-}
-
-define i32 @test_lea_add_scale(i32, i32) {
-; GENERIC-LABEL: test_lea_add_scale:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: # kill: def $esi killed $esi def $rsi
-; GENERIC-NEXT: # kill: def $edi killed $edi def $rdi
-; GENERIC-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_lea_add_scale:
-; ATOM: # %bb.0:
-; ATOM-NEXT: # kill: def $esi killed $esi def $rsi
-; ATOM-NEXT: # kill: def $edi killed $edi def $rdi
-; ATOM-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_lea_add_scale:
-; SLM: # %bb.0:
-; SLM-NEXT: # kill: def $esi killed $esi def $rsi
-; SLM-NEXT: # kill: def $edi killed $edi def $rdi
-; SLM-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_lea_add_scale:
-; SANDY: # %bb.0:
-; SANDY-NEXT: # kill: def $esi killed $esi def $rsi
-; SANDY-NEXT: # kill: def $edi killed $edi def $rdi
-; SANDY-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_lea_add_scale:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: # kill: def $esi killed $esi def $rsi
-; HASWELL-NEXT: # kill: def $edi killed $edi def $rdi
-; HASWELL-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_lea_add_scale:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: # kill: def $esi killed $esi def $rsi
-; BROADWELL-NEXT: # kill: def $edi killed $edi def $rdi
-; BROADWELL-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_lea_add_scale:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: # kill: def $esi killed $esi def $rsi
-; SKYLAKE-NEXT: # kill: def $edi killed $edi def $rdi
-; SKYLAKE-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_lea_add_scale:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: # kill: def $esi killed $esi def $rsi
-; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
-; BDVER2-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_lea_add_scale:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: # kill: def $esi killed $esi def $rsi
-; BTVER2-NEXT: # kill: def $edi killed $edi def $rdi
-; BTVER2-NEXT: leal (%rdi,%rsi,2), %eax # sched: [2:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_lea_add_scale:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: # kill: def $esi killed $esi def $rsi
-; ZNVER1-NEXT: # kill: def $edi killed $edi def $rdi
-; ZNVER1-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %3 = shl i32 %1, 1
- %4 = add nsw i32 %3, %0
- ret i32 %4
-}
-
-define i32 @test_lea_add_scale_offset(i32, i32) {
-; GENERIC-LABEL: test_lea_add_scale_offset:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: # kill: def $esi killed $esi def $rsi
-; GENERIC-NEXT: # kill: def $edi killed $edi def $rdi
-; GENERIC-NEXT: leal (%rdi,%rsi,4), %eax # sched: [1:0.50]
-; GENERIC-NEXT: addl $96, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_lea_add_scale_offset:
-; ATOM: # %bb.0:
-; ATOM-NEXT: # kill: def $esi killed $esi def $rsi
-; ATOM-NEXT: # kill: def $edi killed $edi def $rdi
-; ATOM-NEXT: leal 96(%rdi,%rsi,4), %eax # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_lea_add_scale_offset:
-; SLM: # %bb.0:
-; SLM-NEXT: # kill: def $esi killed $esi def $rsi
-; SLM-NEXT: # kill: def $edi killed $edi def $rdi
-; SLM-NEXT: leal 96(%rdi,%rsi,4), %eax # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_lea_add_scale_offset:
-; SANDY: # %bb.0:
-; SANDY-NEXT: # kill: def $esi killed $esi def $rsi
-; SANDY-NEXT: # kill: def $edi killed $edi def $rdi
-; SANDY-NEXT: leal (%rdi,%rsi,4), %eax # sched: [1:0.50]
-; SANDY-NEXT: addl $96, %eax # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_lea_add_scale_offset:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: # kill: def $esi killed $esi def $rsi
-; HASWELL-NEXT: # kill: def $edi killed $edi def $rdi
-; HASWELL-NEXT: leal (%rdi,%rsi,4), %eax # sched: [1:0.50]
-; HASWELL-NEXT: addl $96, %eax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_lea_add_scale_offset:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: # kill: def $esi killed $esi def $rsi
-; BROADWELL-NEXT: # kill: def $edi killed $edi def $rdi
-; BROADWELL-NEXT: leal (%rdi,%rsi,4), %eax # sched: [1:0.50]
-; BROADWELL-NEXT: addl $96, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_lea_add_scale_offset:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: # kill: def $esi killed $esi def $rsi
-; SKYLAKE-NEXT: # kill: def $edi killed $edi def $rdi
-; SKYLAKE-NEXT: leal (%rdi,%rsi,4), %eax # sched: [1:0.50]
-; SKYLAKE-NEXT: addl $96, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_lea_add_scale_offset:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: # kill: def $esi killed $esi def $rsi
-; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
-; BDVER2-NEXT: leal 96(%rdi,%rsi,4), %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_lea_add_scale_offset:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: # kill: def $esi killed $esi def $rsi
-; BTVER2-NEXT: # kill: def $edi killed $edi def $rdi
-; BTVER2-NEXT: leal 96(%rdi,%rsi,4), %eax # sched: [2:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_lea_add_scale_offset:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: # kill: def $esi killed $esi def $rsi
-; ZNVER1-NEXT: # kill: def $edi killed $edi def $rdi
-; ZNVER1-NEXT: leal 96(%rdi,%rsi,4), %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %3 = shl i32 %1, 2
- %4 = add i32 %0, 96
- %5 = add i32 %4, %3
- ret i32 %5
-}
-
-define i32 @test_lea_add_scale_offset_big(i32, i32) {
-; GENERIC-LABEL: test_lea_add_scale_offset_big:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: # kill: def $esi killed $esi def $rsi
-; GENERIC-NEXT: # kill: def $edi killed $edi def $rdi
-; GENERIC-NEXT: leal (%rdi,%rsi,8), %eax # sched: [1:0.50]
-; GENERIC-NEXT: addl $-1200, %eax # imm = 0xFB50
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_lea_add_scale_offset_big:
-; ATOM: # %bb.0:
-; ATOM-NEXT: # kill: def $esi killed $esi def $rsi
-; ATOM-NEXT: # kill: def $edi killed $edi def $rdi
-; ATOM-NEXT: leal -1200(%rdi,%rsi,8), %eax # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_lea_add_scale_offset_big:
-; SLM: # %bb.0:
-; SLM-NEXT: # kill: def $esi killed $esi def $rsi
-; SLM-NEXT: # kill: def $edi killed $edi def $rdi
-; SLM-NEXT: leal -1200(%rdi,%rsi,8), %eax # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_lea_add_scale_offset_big:
-; SANDY: # %bb.0:
-; SANDY-NEXT: # kill: def $esi killed $esi def $rsi
-; SANDY-NEXT: # kill: def $edi killed $edi def $rdi
-; SANDY-NEXT: leal (%rdi,%rsi,8), %eax # sched: [1:0.50]
-; SANDY-NEXT: addl $-1200, %eax # imm = 0xFB50
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_lea_add_scale_offset_big:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: # kill: def $esi killed $esi def $rsi
-; HASWELL-NEXT: # kill: def $edi killed $edi def $rdi
-; HASWELL-NEXT: leal (%rdi,%rsi,8), %eax # sched: [1:0.50]
-; HASWELL-NEXT: addl $-1200, %eax # imm = 0xFB50
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_lea_add_scale_offset_big:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: # kill: def $esi killed $esi def $rsi
-; BROADWELL-NEXT: # kill: def $edi killed $edi def $rdi
-; BROADWELL-NEXT: leal (%rdi,%rsi,8), %eax # sched: [1:0.50]
-; BROADWELL-NEXT: addl $-1200, %eax # imm = 0xFB50
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_lea_add_scale_offset_big:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: # kill: def $esi killed $esi def $rsi
-; SKYLAKE-NEXT: # kill: def $edi killed $edi def $rdi
-; SKYLAKE-NEXT: leal (%rdi,%rsi,8), %eax # sched: [1:0.50]
-; SKYLAKE-NEXT: addl $-1200, %eax # imm = 0xFB50
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_lea_add_scale_offset_big:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: # kill: def $esi killed $esi def $rsi
-; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
-; BDVER2-NEXT: leal -1200(%rdi,%rsi,8), %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_lea_add_scale_offset_big:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: # kill: def $esi killed $esi def $rsi
-; BTVER2-NEXT: # kill: def $edi killed $edi def $rdi
-; BTVER2-NEXT: leal -1200(%rdi,%rsi,8), %eax # sched: [2:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_lea_add_scale_offset_big:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: # kill: def $esi killed $esi def $rsi
-; ZNVER1-NEXT: # kill: def $edi killed $edi def $rdi
-; ZNVER1-NEXT: leal -1200(%rdi,%rsi,8), %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %3 = shl i32 %1, 3
- %4 = add i32 %0, -1200
- %5 = add i32 %4, %3
- ret i32 %5
-}
diff --git a/llvm/test/CodeGen/X86/lea64-schedule.ll b/llvm/test/CodeGen/X86/lea64-schedule.ll
deleted file mode 100644
index cac9d2b5062..00000000000
--- a/llvm/test/CodeGen/X86/lea64-schedule.ll
+++ /dev/null
@@ -1,728 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
-
-define i64 @test_lea_offset(i64) {
-; GENERIC-LABEL: test_lea_offset:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_lea_offset:
-; ATOM: # %bb.0:
-; ATOM-NEXT: leaq -24(%rdi), %rax # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_lea_offset:
-; SLM: # %bb.0:
-; SLM-NEXT: leaq -24(%rdi), %rax # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_lea_offset:
-; SANDY: # %bb.0:
-; SANDY-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_lea_offset:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_lea_offset:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_lea_offset:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_lea_offset:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_lea_offset:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_lea_offset:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: leaq -24(%rdi), %rax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %2 = add nsw i64 %0, -24
- ret i64 %2
-}
-
-define i64 @test_lea_offset_big(i64) {
-; GENERIC-LABEL: test_lea_offset_big:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_lea_offset_big:
-; ATOM: # %bb.0:
-; ATOM-NEXT: leaq 1024(%rdi), %rax # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_lea_offset_big:
-; SLM: # %bb.0:
-; SLM-NEXT: leaq 1024(%rdi), %rax # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_lea_offset_big:
-; SANDY: # %bb.0:
-; SANDY-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_lea_offset_big:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_lea_offset_big:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_lea_offset_big:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_lea_offset_big:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_lea_offset_big:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_lea_offset_big:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %2 = add nsw i64 %0, 1024
- ret i64 %2
-}
-
-; Function Attrs: norecurse nounwind readnone uwtable
-define i64 @test_lea_add(i64, i64) {
-; GENERIC-LABEL: test_lea_add:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_lea_add:
-; ATOM: # %bb.0:
-; ATOM-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_lea_add:
-; SLM: # %bb.0:
-; SLM-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_lea_add:
-; SANDY: # %bb.0:
-; SANDY-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_lea_add:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_lea_add:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_lea_add:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_lea_add:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_lea_add:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_lea_add:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %3 = add nsw i64 %1, %0
- ret i64 %3
-}
-
-define i64 @test_lea_add_offset(i64, i64) {
-; GENERIC-LABEL: test_lea_add_offset:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
-; GENERIC-NEXT: addq $16, %rax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_lea_add_offset:
-; ATOM: # %bb.0:
-; ATOM-NEXT: leaq 16(%rdi,%rsi), %rax # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_lea_add_offset:
-; SLM: # %bb.0:
-; SLM-NEXT: leaq 16(%rdi,%rsi), %rax # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_lea_add_offset:
-; SANDY: # %bb.0:
-; SANDY-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
-; SANDY-NEXT: addq $16, %rax # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_lea_add_offset:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
-; HASWELL-NEXT: addq $16, %rax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_lea_add_offset:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
-; BROADWELL-NEXT: addq $16, %rax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_lea_add_offset:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
-; SKYLAKE-NEXT: addq $16, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_lea_add_offset:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: leaq 16(%rdi,%rsi), %rax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_lea_add_offset:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: leaq 16(%rdi,%rsi), %rax # sched: [2:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_lea_add_offset:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: leaq 16(%rdi,%rsi), %rax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %3 = add i64 %0, 16
- %4 = add i64 %3, %1
- ret i64 %4
-}
-
-define i64 @test_lea_add_offset_big(i64, i64) {
-; GENERIC-LABEL: test_lea_add_offset_big:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
-; GENERIC-NEXT: addq $-4096, %rax # imm = 0xF000
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_lea_add_offset_big:
-; ATOM: # %bb.0:
-; ATOM-NEXT: leaq -4096(%rdi,%rsi), %rax # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_lea_add_offset_big:
-; SLM: # %bb.0:
-; SLM-NEXT: leaq -4096(%rdi,%rsi), %rax # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_lea_add_offset_big:
-; SANDY: # %bb.0:
-; SANDY-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
-; SANDY-NEXT: addq $-4096, %rax # imm = 0xF000
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_lea_add_offset_big:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
-; HASWELL-NEXT: addq $-4096, %rax # imm = 0xF000
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_lea_add_offset_big:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
-; BROADWELL-NEXT: addq $-4096, %rax # imm = 0xF000
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_lea_add_offset_big:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
-; SKYLAKE-NEXT: addq $-4096, %rax # imm = 0xF000
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_lea_add_offset_big:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: leaq -4096(%rdi,%rsi), %rax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_lea_add_offset_big:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: leaq -4096(%rdi,%rsi), %rax # sched: [2:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_lea_add_offset_big:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: leaq -4096(%rdi,%rsi), %rax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %3 = add i64 %0, -4096
- %4 = add i64 %3, %1
- ret i64 %4
-}
-
-define i64 @test_lea_mul(i64) {
-; GENERIC-LABEL: test_lea_mul:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_lea_mul:
-; ATOM: # %bb.0:
-; ATOM-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_lea_mul:
-; SLM: # %bb.0:
-; SLM-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_lea_mul:
-; SANDY: # %bb.0:
-; SANDY-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_lea_mul:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_lea_mul:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_lea_mul:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_lea_mul:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_lea_mul:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [2:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_lea_mul:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %2 = mul nsw i64 %0, 3
- ret i64 %2
-}
-
-define i64 @test_lea_mul_offset(i64) {
-; GENERIC-LABEL: test_lea_mul_offset:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
-; GENERIC-NEXT: addq $-32, %rax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_lea_mul_offset:
-; ATOM: # %bb.0:
-; ATOM-NEXT: leaq -32(%rdi,%rdi,2), %rax # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_lea_mul_offset:
-; SLM: # %bb.0:
-; SLM-NEXT: leaq -32(%rdi,%rdi,2), %rax # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_lea_mul_offset:
-; SANDY: # %bb.0:
-; SANDY-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
-; SANDY-NEXT: addq $-32, %rax # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_lea_mul_offset:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
-; HASWELL-NEXT: addq $-32, %rax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_lea_mul_offset:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
-; BROADWELL-NEXT: addq $-32, %rax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_lea_mul_offset:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
-; SKYLAKE-NEXT: addq $-32, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_lea_mul_offset:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: leaq -32(%rdi,%rdi,2), %rax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_lea_mul_offset:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: leaq -32(%rdi,%rdi,2), %rax # sched: [2:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_lea_mul_offset:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: leaq -32(%rdi,%rdi,2), %rax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %2 = mul nsw i64 %0, 3
- %3 = add nsw i64 %2, -32
- ret i64 %3
-}
-
-define i64 @test_lea_mul_offset_big(i64) {
-; GENERIC-LABEL: test_lea_mul_offset_big:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
-; GENERIC-NEXT: addq $10000, %rax # imm = 0x2710
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_lea_mul_offset_big:
-; ATOM: # %bb.0:
-; ATOM-NEXT: leaq 10000(%rdi,%rdi,8), %rax # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_lea_mul_offset_big:
-; SLM: # %bb.0:
-; SLM-NEXT: leaq 10000(%rdi,%rdi,8), %rax # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_lea_mul_offset_big:
-; SANDY: # %bb.0:
-; SANDY-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
-; SANDY-NEXT: addq $10000, %rax # imm = 0x2710
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_lea_mul_offset_big:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
-; HASWELL-NEXT: addq $10000, %rax # imm = 0x2710
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_lea_mul_offset_big:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
-; BROADWELL-NEXT: addq $10000, %rax # imm = 0x2710
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_lea_mul_offset_big:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
-; SKYLAKE-NEXT: addq $10000, %rax # imm = 0x2710
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_lea_mul_offset_big:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: leaq 10000(%rdi,%rdi,8), %rax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_lea_mul_offset_big:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: leaq 10000(%rdi,%rdi,8), %rax # sched: [2:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_lea_mul_offset_big:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: leaq 10000(%rdi,%rdi,8), %rax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %2 = mul nsw i64 %0, 9
- %3 = add nsw i64 %2, 10000
- ret i64 %3
-}
-
-define i64 @test_lea_add_scale(i64, i64) {
-; GENERIC-LABEL: test_lea_add_scale:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_lea_add_scale:
-; ATOM: # %bb.0:
-; ATOM-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_lea_add_scale:
-; SLM: # %bb.0:
-; SLM-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_lea_add_scale:
-; SANDY: # %bb.0:
-; SANDY-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_lea_add_scale:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_lea_add_scale:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_lea_add_scale:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_lea_add_scale:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_lea_add_scale:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [2:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_lea_add_scale:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %3 = shl i64 %1, 1
- %4 = add nsw i64 %3, %0
- ret i64 %4
-}
-
-define i64 @test_lea_add_scale_offset(i64, i64) {
-; GENERIC-LABEL: test_lea_add_scale_offset:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: leaq (%rdi,%rsi,4), %rax # sched: [1:0.50]
-; GENERIC-NEXT: addq $96, %rax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_lea_add_scale_offset:
-; ATOM: # %bb.0:
-; ATOM-NEXT: leaq 96(%rdi,%rsi,4), %rax # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_lea_add_scale_offset:
-; SLM: # %bb.0:
-; SLM-NEXT: leaq 96(%rdi,%rsi,4), %rax # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_lea_add_scale_offset:
-; SANDY: # %bb.0:
-; SANDY-NEXT: leaq (%rdi,%rsi,4), %rax # sched: [1:0.50]
-; SANDY-NEXT: addq $96, %rax # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_lea_add_scale_offset:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: leaq (%rdi,%rsi,4), %rax # sched: [1:0.50]
-; HASWELL-NEXT: addq $96, %rax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_lea_add_scale_offset:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: leaq (%rdi,%rsi,4), %rax # sched: [1:0.50]
-; BROADWELL-NEXT: addq $96, %rax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_lea_add_scale_offset:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: leaq (%rdi,%rsi,4), %rax # sched: [1:0.50]
-; SKYLAKE-NEXT: addq $96, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_lea_add_scale_offset:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: leaq 96(%rdi,%rsi,4), %rax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_lea_add_scale_offset:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: leaq 96(%rdi,%rsi,4), %rax # sched: [2:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_lea_add_scale_offset:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: leaq 96(%rdi,%rsi,4), %rax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %3 = shl i64 %1, 2
- %4 = add i64 %0, 96
- %5 = add i64 %4, %3
- ret i64 %5
-}
-
-define i64 @test_lea_add_scale_offset_big(i64, i64) {
-; GENERIC-LABEL: test_lea_add_scale_offset_big:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: leaq (%rdi,%rsi,8), %rax # sched: [1:0.50]
-; GENERIC-NEXT: addq $-1200, %rax # imm = 0xFB50
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_lea_add_scale_offset_big:
-; ATOM: # %bb.0:
-; ATOM-NEXT: leaq -1200(%rdi,%rsi,8), %rax # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_lea_add_scale_offset_big:
-; SLM: # %bb.0:
-; SLM-NEXT: leaq -1200(%rdi,%rsi,8), %rax # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_lea_add_scale_offset_big:
-; SANDY: # %bb.0:
-; SANDY-NEXT: leaq (%rdi,%rsi,8), %rax # sched: [1:0.50]
-; SANDY-NEXT: addq $-1200, %rax # imm = 0xFB50
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_lea_add_scale_offset_big:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: leaq (%rdi,%rsi,8), %rax # sched: [1:0.50]
-; HASWELL-NEXT: addq $-1200, %rax # imm = 0xFB50
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_lea_add_scale_offset_big:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: leaq (%rdi,%rsi,8), %rax # sched: [1:0.50]
-; BROADWELL-NEXT: addq $-1200, %rax # imm = 0xFB50
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_lea_add_scale_offset_big:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: leaq (%rdi,%rsi,8), %rax # sched: [1:0.50]
-; SKYLAKE-NEXT: addq $-1200, %rax # imm = 0xFB50
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_lea_add_scale_offset_big:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: leaq -1200(%rdi,%rsi,8), %rax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_lea_add_scale_offset_big:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: leaq -1200(%rdi,%rsi,8), %rax # sched: [2:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_lea_add_scale_offset_big:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: leaq -1200(%rdi,%rsi,8), %rax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %3 = shl i64 %1, 3
- %4 = add i64 %0, -1200
- %5 = add i64 %4, %3
- ret i64 %5
-}
diff --git a/llvm/test/CodeGen/X86/lwp-schedule.ll b/llvm/test/CodeGen/X86/lwp-schedule.ll
deleted file mode 100644
index c10282cfb8e..00000000000
--- a/llvm/test/CodeGen/X86/lwp-schedule.ll
+++ /dev/null
@@ -1,299 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule -mcpu=x86-64 -mattr=+lwp | FileCheck %s --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule -mcpu=bdver1 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER12 --check-prefix=BDVER1
-; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER12 --check-prefix=BDVER2
-; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule -mcpu=bdver3 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER3
-; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule -mcpu=bdver4 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER4
-
-define void @test_llwpcb(i8 *%a0) nounwind {
-; GENERIC-LABEL: test_llwpcb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: llwpcb %rdi # sched: [100:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_llwpcb:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: llwpcb %rdi # sched: [100:0.50]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_llwpcb:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: llwpcb %rdi
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_llwpcb:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: llwpcb %rdi
-; BDVER4-NEXT: retq
- tail call void @llvm.x86.llwpcb(i8 *%a0)
- ret void
-}
-
-define i8* @test_slwpcb(i8 *%a0) nounwind {
-; GENERIC-LABEL: test_slwpcb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: slwpcb %rax # sched: [100:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_slwpcb:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: slwpcb %rax # sched: [100:0.50]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_slwpcb:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: slwpcb %rax
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_slwpcb:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: slwpcb %rax
-; BDVER4-NEXT: retq
- %1 = tail call i8* @llvm.x86.slwpcb()
- ret i8 *%1
-}
-
-define i8 @test_lwpins32_rri(i32 %a0, i32 %a1) nounwind {
-; GENERIC-LABEL: test_lwpins32_rri:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: addl %esi, %esi # sched: [1:0.33]
-; GENERIC-NEXT: lwpins $-1985229329, %esi, %edi # imm = 0x89ABCDEF
-; GENERIC-NEXT: # sched: [100:0.33]
-; GENERIC-NEXT: setb %al # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_lwpins32_rri:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: addl %esi, %esi # sched: [1:0.50]
-; BDVER12-NEXT: lwpins $-1985229329, %esi, %edi # imm = 0x89ABCDEF
-; BDVER12-NEXT: # sched: [100:0.50]
-; BDVER12-NEXT: setb %al # sched: [1:0.50]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_lwpins32_rri:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: addl %esi, %esi
-; BDVER3-NEXT: lwpins $-1985229329, %esi, %edi # imm = 0x89ABCDEF
-; BDVER3-NEXT: setb %al
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_lwpins32_rri:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: addl %esi, %esi
-; BDVER4-NEXT: lwpins $-1985229329, %esi, %edi # imm = 0x89ABCDEF
-; BDVER4-NEXT: setb %al
-; BDVER4-NEXT: retq
- %1 = add i32 %a1, %a1
- %2 = tail call i8 @llvm.x86.lwpins32(i32 %a0, i32 %1, i32 2309737967)
- ret i8 %2
-}
-
-define i8 @test_lwpins32_rmi(i32 %a0, i32 *%p1) nounwind {
-; GENERIC-LABEL: test_lwpins32_rmi:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: lwpins $1985229328, (%rsi), %edi # imm = 0x76543210
-; GENERIC-NEXT: # sched: [100:0.33]
-; GENERIC-NEXT: setb %al # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_lwpins32_rmi:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: lwpins $1985229328, (%rsi), %edi # imm = 0x76543210
-; BDVER12-NEXT: # sched: [100:0.50]
-; BDVER12-NEXT: setb %al # sched: [1:0.50]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_lwpins32_rmi:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: lwpins $1985229328, (%rsi), %edi # imm = 0x76543210
-; BDVER3-NEXT: setb %al
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_lwpins32_rmi:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: lwpins $1985229328, (%rsi), %edi # imm = 0x76543210
-; BDVER4-NEXT: setb %al
-; BDVER4-NEXT: retq
- %a1 = load i32, i32 *%p1
- %1 = tail call i8 @llvm.x86.lwpins32(i32 %a0, i32 %a1, i32 1985229328)
- ret i8 %1
-}
-
-define i8 @test_lwpins64_rri(i64 %a0, i32 %a1) nounwind {
-; GENERIC-LABEL: test_lwpins64_rri:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: lwpins $-1985229329, %esi, %rdi # imm = 0x89ABCDEF
-; GENERIC-NEXT: # sched: [100:0.33]
-; GENERIC-NEXT: setb %al # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_lwpins64_rri:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: lwpins $-1985229329, %esi, %rdi # imm = 0x89ABCDEF
-; BDVER12-NEXT: # sched: [100:0.50]
-; BDVER12-NEXT: setb %al # sched: [1:0.50]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_lwpins64_rri:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: lwpins $-1985229329, %esi, %rdi # imm = 0x89ABCDEF
-; BDVER3-NEXT: setb %al
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_lwpins64_rri:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: lwpins $-1985229329, %esi, %rdi # imm = 0x89ABCDEF
-; BDVER4-NEXT: setb %al
-; BDVER4-NEXT: retq
- %1 = tail call i8 @llvm.x86.lwpins64(i64 %a0, i32 %a1, i32 2309737967)
- ret i8 %1
-}
-
-define i8 @test_lwpins64_rmi(i64 %a0, i32 *%p1) nounwind {
-; GENERIC-LABEL: test_lwpins64_rmi:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: lwpins $1985229328, (%rsi), %rdi # imm = 0x76543210
-; GENERIC-NEXT: # sched: [100:0.33]
-; GENERIC-NEXT: setb %al # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_lwpins64_rmi:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: lwpins $1985229328, (%rsi), %rdi # imm = 0x76543210
-; BDVER12-NEXT: # sched: [100:0.50]
-; BDVER12-NEXT: setb %al # sched: [1:0.50]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_lwpins64_rmi:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: lwpins $1985229328, (%rsi), %rdi # imm = 0x76543210
-; BDVER3-NEXT: setb %al
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_lwpins64_rmi:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: lwpins $1985229328, (%rsi), %rdi # imm = 0x76543210
-; BDVER4-NEXT: setb %al
-; BDVER4-NEXT: retq
- %a1 = load i32, i32 *%p1
- %1 = tail call i8 @llvm.x86.lwpins64(i64 %a0, i32 %a1, i32 1985229328)
- ret i8 %1
-}
-
-define void @test_lwpval32_rri(i32 %a0, i32 %a1) nounwind {
-; GENERIC-LABEL: test_lwpval32_rri:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: addl %esi, %esi # sched: [1:0.33]
-; GENERIC-NEXT: lwpval $-19088744, %esi, %edi # imm = 0xFEDCBA98
-; GENERIC-NEXT: # sched: [100:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_lwpval32_rri:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: addl %esi, %esi # sched: [1:0.50]
-; BDVER12-NEXT: lwpval $-19088744, %esi, %edi # imm = 0xFEDCBA98
-; BDVER12-NEXT: # sched: [100:0.50]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_lwpval32_rri:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: addl %esi, %esi
-; BDVER3-NEXT: lwpval $-19088744, %esi, %edi # imm = 0xFEDCBA98
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_lwpval32_rri:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: addl %esi, %esi
-; BDVER4-NEXT: lwpval $-19088744, %esi, %edi # imm = 0xFEDCBA98
-; BDVER4-NEXT: retq
- %1 = add i32 %a1, %a1
- tail call void @llvm.x86.lwpval32(i32 %a0, i32 %1, i32 4275878552)
- ret void
-}
-
-define void @test_lwpval32_rmi(i32 %a0, i32 *%p1) nounwind {
-; GENERIC-LABEL: test_lwpval32_rmi:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: lwpval $305419896, (%rsi), %edi # imm = 0x12345678
-; GENERIC-NEXT: # sched: [100:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_lwpval32_rmi:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: lwpval $305419896, (%rsi), %edi # imm = 0x12345678
-; BDVER12-NEXT: # sched: [100:0.50]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_lwpval32_rmi:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: lwpval $305419896, (%rsi), %edi # imm = 0x12345678
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_lwpval32_rmi:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: lwpval $305419896, (%rsi), %edi # imm = 0x12345678
-; BDVER4-NEXT: retq
- %a1 = load i32, i32 *%p1
- tail call void @llvm.x86.lwpval32(i32 %a0, i32 %a1, i32 305419896)
- ret void
-}
-
-define void @test_lwpval64_rri(i64 %a0, i32 %a1) nounwind {
-; GENERIC-LABEL: test_lwpval64_rri:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: lwpval $-19088744, %esi, %rdi # imm = 0xFEDCBA98
-; GENERIC-NEXT: # sched: [100:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_lwpval64_rri:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: lwpval $-19088744, %esi, %rdi # imm = 0xFEDCBA98
-; BDVER12-NEXT: # sched: [100:0.50]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_lwpval64_rri:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: lwpval $-19088744, %esi, %rdi # imm = 0xFEDCBA98
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_lwpval64_rri:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: lwpval $-19088744, %esi, %rdi # imm = 0xFEDCBA98
-; BDVER4-NEXT: retq
- tail call void @llvm.x86.lwpval64(i64 %a0, i32 %a1, i32 4275878552)
- ret void
-}
-
-define void @test_lwpval64_rmi(i64 %a0, i32 *%p1) nounwind {
-; GENERIC-LABEL: test_lwpval64_rmi:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: lwpval $305419896, (%rsi), %rdi # imm = 0x12345678
-; GENERIC-NEXT: # sched: [100:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_lwpval64_rmi:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: lwpval $305419896, (%rsi), %rdi # imm = 0x12345678
-; BDVER12-NEXT: # sched: [100:0.50]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_lwpval64_rmi:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: lwpval $305419896, (%rsi), %rdi # imm = 0x12345678
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_lwpval64_rmi:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: lwpval $305419896, (%rsi), %rdi # imm = 0x12345678
-; BDVER4-NEXT: retq
- %a1 = load i32, i32 *%p1
- tail call void @llvm.x86.lwpval64(i64 %a0, i32 %a1, i32 305419896)
- ret void
-}
-
-declare void @llvm.x86.llwpcb(i8*) nounwind
-declare i8* @llvm.x86.slwpcb() nounwind
-declare i8 @llvm.x86.lwpins32(i32, i32, i32) nounwind
-declare i8 @llvm.x86.lwpins64(i64, i32, i32) nounwind
-declare void @llvm.x86.lwpval32(i32, i32, i32) nounwind
-declare void @llvm.x86.lwpval64(i64, i32, i32) nounwind
diff --git a/llvm/test/CodeGen/X86/lzcnt-schedule.ll b/llvm/test/CodeGen/X86/lzcnt-schedule.ll
deleted file mode 100644
index d8f9416b92b..00000000000
--- a/llvm/test/CodeGen/X86/lzcnt-schedule.ll
+++ /dev/null
@@ -1,187 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+lzcnt | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
-
-define i16 @test_ctlz_i16(i16 zeroext %a0, i16 *%a1) {
-; GENERIC-LABEL: test_ctlz_i16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: lzcntw (%rsi), %cx # sched: [8:1.00]
-; GENERIC-NEXT: lzcntw %di, %ax # sched: [3:1.00]
-; GENERIC-NEXT: orl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_ctlz_i16:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: lzcntw (%rsi), %cx # sched: [8:1.00]
-; HASWELL-NEXT: lzcntw %di, %ax # sched: [3:1.00]
-; HASWELL-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT: # kill: def $ax killed $ax killed $eax
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_ctlz_i16:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: lzcntw (%rsi), %cx # sched: [8:1.00]
-; BROADWELL-NEXT: lzcntw %di, %ax # sched: [3:1.00]
-; BROADWELL-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: # kill: def $ax killed $ax killed $eax
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_ctlz_i16:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: lzcntw (%rsi), %cx # sched: [8:1.00]
-; SKYLAKE-NEXT: lzcntw %di, %ax # sched: [3:1.00]
-; SKYLAKE-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: # kill: def $ax killed $ax killed $eax
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_ctlz_i16:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: lzcntw (%rsi), %cx # sched: [6:0.50]
-; BDVER2-NEXT: lzcntw %di, %ax # sched: [2:0.50]
-; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.50]
-; BDVER2-NEXT: # kill: def $ax killed $ax killed $eax
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_ctlz_i16:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: lzcntw (%rsi), %cx # sched: [4:1.00]
-; BTVER2-NEXT: lzcntw %di, %ax # sched: [1:0.50]
-; BTVER2-NEXT: orl %ecx, %eax # sched: [1:0.50]
-; BTVER2-NEXT: # kill: def $ax killed $ax killed $eax
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_ctlz_i16:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: lzcntw (%rsi), %cx # sched: [6:0.50]
-; ZNVER1-NEXT: lzcntw %di, %ax # sched: [2:0.25]
-; ZNVER1-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: # kill: def $ax killed $ax killed $eax
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i16, i16 *%a1
- %2 = tail call i16 @llvm.ctlz.i16( i16 %1, i1 false )
- %3 = tail call i16 @llvm.ctlz.i16( i16 %a0, i1 false )
- %4 = or i16 %2, %3
- ret i16 %4
-}
-declare i16 @llvm.ctlz.i16(i16, i1)
-
-define i32 @test_ctlz_i32(i32 %a0, i32 *%a1) {
-; GENERIC-LABEL: test_ctlz_i32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: lzcntl (%rsi), %ecx # sched: [8:1.00]
-; GENERIC-NEXT: lzcntl %edi, %eax # sched: [3:1.00]
-; GENERIC-NEXT: orl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_ctlz_i32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: lzcntl (%rsi), %ecx # sched: [8:1.00]
-; HASWELL-NEXT: lzcntl %edi, %eax # sched: [3:1.00]
-; HASWELL-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_ctlz_i32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: lzcntl (%rsi), %ecx # sched: [8:1.00]
-; BROADWELL-NEXT: lzcntl %edi, %eax # sched: [3:1.00]
-; BROADWELL-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_ctlz_i32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: lzcntl (%rsi), %ecx # sched: [8:1.00]
-; SKYLAKE-NEXT: lzcntl %edi, %eax # sched: [3:1.00]
-; SKYLAKE-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_ctlz_i32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: lzcntl (%rsi), %ecx # sched: [6:0.50]
-; BDVER2-NEXT: lzcntl %edi, %eax # sched: [2:0.50]
-; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_ctlz_i32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: lzcntl (%rsi), %ecx # sched: [4:1.00]
-; BTVER2-NEXT: lzcntl %edi, %eax # sched: [1:0.50]
-; BTVER2-NEXT: orl %ecx, %eax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_ctlz_i32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: lzcntl (%rsi), %ecx # sched: [6:0.50]
-; ZNVER1-NEXT: lzcntl %edi, %eax # sched: [2:0.25]
-; ZNVER1-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i32, i32 *%a1
- %2 = tail call i32 @llvm.ctlz.i32( i32 %1, i1 false )
- %3 = tail call i32 @llvm.ctlz.i32( i32 %a0, i1 false )
- %4 = or i32 %2, %3
- ret i32 %4
-}
-declare i32 @llvm.ctlz.i32(i32, i1)
-
-define i64 @test_ctlz_i64(i64 %a0, i64 *%a1) {
-; GENERIC-LABEL: test_ctlz_i64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: lzcntq (%rsi), %rcx # sched: [8:1.00]
-; GENERIC-NEXT: lzcntq %rdi, %rax # sched: [3:1.00]
-; GENERIC-NEXT: orq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_ctlz_i64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: lzcntq (%rsi), %rcx # sched: [8:1.00]
-; HASWELL-NEXT: lzcntq %rdi, %rax # sched: [3:1.00]
-; HASWELL-NEXT: orq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_ctlz_i64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: lzcntq (%rsi), %rcx # sched: [8:1.00]
-; BROADWELL-NEXT: lzcntq %rdi, %rax # sched: [3:1.00]
-; BROADWELL-NEXT: orq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_ctlz_i64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: lzcntq (%rsi), %rcx # sched: [8:1.00]
-; SKYLAKE-NEXT: lzcntq %rdi, %rax # sched: [3:1.00]
-; SKYLAKE-NEXT: orq %rcx, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_ctlz_i64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: lzcntq (%rsi), %rcx # sched: [6:0.50]
-; BDVER2-NEXT: lzcntq %rdi, %rax # sched: [2:0.50]
-; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_ctlz_i64:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: lzcntq (%rsi), %rcx # sched: [4:1.00]
-; BTVER2-NEXT: lzcntq %rdi, %rax # sched: [1:0.50]
-; BTVER2-NEXT: orq %rcx, %rax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_ctlz_i64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: lzcntq (%rsi), %rcx # sched: [6:0.50]
-; ZNVER1-NEXT: lzcntq %rdi, %rax # sched: [2:0.25]
-; ZNVER1-NEXT: orq %rcx, %rax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i64, i64 *%a1
- %2 = tail call i64 @llvm.ctlz.i64( i64 %1, i1 false )
- %3 = tail call i64 @llvm.ctlz.i64( i64 %a0, i1 false )
- %4 = or i64 %2, %3
- ret i64 %4
-}
-declare i64 @llvm.ctlz.i64(i64, i1)
diff --git a/llvm/test/CodeGen/X86/mmx-schedule.ll b/llvm/test/CodeGen/X86/mmx-schedule.ll
deleted file mode 100644
index 9aa1de95a55..00000000000
--- a/llvm/test/CodeGen/X86/mmx-schedule.ll
+++ /dev/null
@@ -1,7559 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
-
-define i64 @test_cvtpd2pi(<2 x double> %a0, <2 x double>* %a1) optsize {
-; GENERIC-LABEL: test_cvtpd2pi:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: cvtpd2pi (%rdi), %mm0 # sched: [10:1.00]
-; GENERIC-NEXT: cvtpd2pi %xmm0, %mm1 # sched: [4:1.00]
-; GENERIC-NEXT: por %mm1, %mm0 # sched: [1:0.33]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvtpd2pi:
-; ATOM: # %bb.0:
-; ATOM-NEXT: cvtpd2pi (%rdi), %mm0 # sched: [8:4.00]
-; ATOM-NEXT: cvtpd2pi %xmm0, %mm1 # sched: [7:3.50]
-; ATOM-NEXT: por %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvtpd2pi:
-; SLM: # %bb.0:
-; SLM-NEXT: cvtpd2pi (%rdi), %mm1 # sched: [7:1.00]
-; SLM-NEXT: cvtpd2pi %xmm0, %mm0 # sched: [4:0.50]
-; SLM-NEXT: por %mm0, %mm1 # sched: [1:0.50]
-; SLM-NEXT: movq %mm1, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_cvtpd2pi:
-; SANDY: # %bb.0:
-; SANDY-NEXT: cvtpd2pi (%rdi), %mm0 # sched: [10:1.00]
-; SANDY-NEXT: cvtpd2pi %xmm0, %mm1 # sched: [4:1.00]
-; SANDY-NEXT: por %mm1, %mm0 # sched: [1:0.33]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_cvtpd2pi:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: cvtpd2pi (%rdi), %mm0 # sched: [10:1.00]
-; HASWELL-NEXT: cvtpd2pi %xmm0, %mm1 # sched: [4:1.00]
-; HASWELL-NEXT: por %mm1, %mm0 # sched: [1:0.33]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvtpd2pi:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: cvtpd2pi %xmm0, %mm0 # sched: [4:1.00]
-; BROADWELL-NEXT: cvtpd2pi (%rdi), %mm1 # sched: [9:1.00]
-; BROADWELL-NEXT: por %mm0, %mm1 # sched: [1:0.33]
-; BROADWELL-NEXT: movq %mm1, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvtpd2pi:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: cvtpd2pi %xmm0, %mm0 # sched: [5:1.00]
-; SKYLAKE-NEXT: cvtpd2pi (%rdi), %mm1 # sched: [11:1.00]
-; SKYLAKE-NEXT: por %mm0, %mm1 # sched: [1:0.50]
-; SKYLAKE-NEXT: movq %mm1, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvtpd2pi:
-; SKX: # %bb.0:
-; SKX-NEXT: cvtpd2pi %xmm0, %mm0 # sched: [5:1.00]
-; SKX-NEXT: cvtpd2pi (%rdi), %mm1 # sched: [11:1.00]
-; SKX-NEXT: por %mm0, %mm1 # sched: [1:0.50]
-; SKX-NEXT: movq %mm1, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_cvtpd2pi:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: cvtpd2pi (%rdi), %mm1 # sched: [13:1.00]
-; BDVER2-NEXT: cvtpd2pi %xmm0, %mm0 # sched: [6:1.00]
-; BDVER2-NEXT: por %mm0, %mm1 # sched: [2:0.50]
-; BDVER2-NEXT: movq %mm1, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_cvtpd2pi:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: cvtpd2pi (%rdi), %mm1 # sched: [8:1.00]
-; BTVER2-NEXT: cvtpd2pi %xmm0, %mm0 # sched: [3:1.00]
-; BTVER2-NEXT: por %mm0, %mm1 # sched: [1:0.50]
-; BTVER2-NEXT: movq %mm1, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_cvtpd2pi:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: cvtpd2pi (%rdi), %mm1 # sched: [12:1.00]
-; ZNVER1-NEXT: cvtpd2pi %xmm0, %mm0 # sched: [4:1.00]
-; ZNVER1-NEXT: por %mm0, %mm1 # sched: [1:0.25]
-; ZNVER1-NEXT: movq %mm1, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a0)
- %2 = load <2 x double>, <2 x double> *%a1, align 16
- %3 = call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %2)
- %4 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %1, x86_mmx %3)
- %5 = bitcast x86_mmx %4 to i64
- ret i64 %5
-}
-declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone
-
-define <2 x double> @test_cvtpi2pd(x86_mmx %a0, x86_mmx* %a1) optsize {
-; GENERIC-LABEL: test_cvtpi2pd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: cvtpi2pd %mm0, %xmm1 # sched: [4:1.00]
-; GENERIC-NEXT: cvtpi2pd (%rdi), %xmm0 # sched: [10:1.00]
-; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvtpi2pd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: cvtpi2pd (%rdi), %xmm0 # sched: [8:4.00]
-; ATOM-NEXT: cvtpi2pd %mm0, %xmm1 # sched: [7:3.50]
-; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvtpi2pd:
-; SLM: # %bb.0:
-; SLM-NEXT: cvtpi2pd (%rdi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT: cvtpi2pd %mm0, %xmm1 # sched: [4:0.50]
-; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_cvtpi2pd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [4:1.00]
-; SANDY-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [10:1.00]
-; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_cvtpi2pd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [4:1.00]
-; HASWELL-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [9:1.00]
-; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvtpi2pd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: cvtpi2pd (%rdi), %xmm0 # sched: [9:1.00]
-; BROADWELL-NEXT: cvtpi2pd %mm0, %xmm1 # sched: [4:1.00]
-; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvtpi2pd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [5:1.00]
-; SKYLAKE-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [10:1.00]
-; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvtpi2pd:
-; SKX: # %bb.0:
-; SKX-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [9:0.50]
-; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_cvtpi2pd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [13:1.00]
-; BDVER2-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [6:1.00]
-; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_cvtpi2pd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [8:1.00]
-; BTVER2-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_cvtpi2pd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [12:1.00]
-; ZNVER1-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %a0)
- %2 = load x86_mmx, x86_mmx *%a1, align 8
- %3 = call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %2)
- %4 = fadd <2 x double> %1, %3
- ret <2 x double> %4
-}
-declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone
-
-define <4 x float> @test_cvtpi2ps(x86_mmx %a0, x86_mmx* %a1, <4 x float> %a2, <4 x float> %a3) optsize {
-; GENERIC-LABEL: test_cvtpi2ps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [9:1.00]
-; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvtpi2ps:
-; ATOM: # %bb.0:
-; ATOM-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [5:5.00]
-; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvtpi2ps:
-; SLM: # %bb.0:
-; SLM-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [7:1.00]
-; SLM-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [4:0.50]
-; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_cvtpi2ps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [9:1.00]
-; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_cvtpi2ps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [8:1.00]
-; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvtpi2ps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [8:1.00]
-; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvtpi2ps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [6:2.00]
-; SKYLAKE-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [9:1.00]
-; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvtpi2ps:
-; SKX: # %bb.0:
-; SKX-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [6:2.00]
-; SKX-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [9:1.00]
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_cvtpi2ps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [9:1.00]
-; BDVER2-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [4:1.00]
-; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_cvtpi2ps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [8:1.00]
-; BTVER2-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_cvtpi2ps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [12:1.00]
-; ZNVER1-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [5:1.00]
-; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a2, x86_mmx %a0)
- %2 = load x86_mmx, x86_mmx *%a1, align 8
- %3 = call <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float> %a3, x86_mmx %2)
- %4 = fadd <4 x float> %1, %3
- ret <4 x float> %4
-}
-declare <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, x86_mmx) nounwind readnone
-
-define i64 @test_cvtps2pi(<4 x float> %a0, <4 x float>* %a1) optsize {
-; GENERIC-LABEL: test_cvtps2pi:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: cvtps2pi %xmm0, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT: cvtps2pi (%rdi), %mm1 # sched: [9:1.00]
-; GENERIC-NEXT: por %mm0, %mm1 # sched: [1:0.33]
-; GENERIC-NEXT: movq %mm1, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvtps2pi:
-; ATOM: # %bb.0:
-; ATOM-NEXT: cvtps2pi %xmm0, %mm0 # sched: [5:5.00]
-; ATOM-NEXT: cvtps2pi (%rdi), %mm1 # sched: [5:5.00]
-; ATOM-NEXT: por %mm0, %mm1 # sched: [1:0.50]
-; ATOM-NEXT: movq %mm1, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvtps2pi:
-; SLM: # %bb.0:
-; SLM-NEXT: cvtps2pi (%rdi), %mm1 # sched: [7:1.00]
-; SLM-NEXT: cvtps2pi %xmm0, %mm0 # sched: [4:0.50]
-; SLM-NEXT: por %mm0, %mm1 # sched: [1:0.50]
-; SLM-NEXT: movq %mm1, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_cvtps2pi:
-; SANDY: # %bb.0:
-; SANDY-NEXT: cvtps2pi %xmm0, %mm0 # sched: [3:1.00]
-; SANDY-NEXT: cvtps2pi (%rdi), %mm1 # sched: [9:1.00]
-; SANDY-NEXT: por %mm0, %mm1 # sched: [1:0.33]
-; SANDY-NEXT: movq %mm1, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_cvtps2pi:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: cvtps2pi %xmm0, %mm0 # sched: [4:1.00]
-; HASWELL-NEXT: cvtps2pi (%rdi), %mm1 # sched: [8:1.00]
-; HASWELL-NEXT: por %mm0, %mm1 # sched: [1:0.33]
-; HASWELL-NEXT: movq %mm1, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvtps2pi:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: cvtps2pi %xmm0, %mm0 # sched: [4:1.00]
-; BROADWELL-NEXT: cvtps2pi (%rdi), %mm1 # sched: [8:1.00]
-; BROADWELL-NEXT: por %mm0, %mm1 # sched: [1:0.33]
-; BROADWELL-NEXT: movq %mm1, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvtps2pi:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: cvtps2pi %xmm0, %mm0 # sched: [5:1.00]
-; SKYLAKE-NEXT: cvtps2pi (%rdi), %mm1 # sched: [9:0.50]
-; SKYLAKE-NEXT: por %mm0, %mm1 # sched: [1:0.50]
-; SKYLAKE-NEXT: movq %mm1, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvtps2pi:
-; SKX: # %bb.0:
-; SKX-NEXT: cvtps2pi %xmm0, %mm0 # sched: [5:1.00]
-; SKX-NEXT: cvtps2pi (%rdi), %mm1 # sched: [9:0.50]
-; SKX-NEXT: por %mm0, %mm1 # sched: [1:0.50]
-; SKX-NEXT: movq %mm1, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_cvtps2pi:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: cvtps2pi (%rdi), %mm1 # sched: [9:1.00]
-; BDVER2-NEXT: cvtps2pi %xmm0, %mm0 # sched: [4:1.00]
-; BDVER2-NEXT: por %mm0, %mm1 # sched: [2:0.50]
-; BDVER2-NEXT: movq %mm1, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_cvtps2pi:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: cvtps2pi (%rdi), %mm1 # sched: [8:1.00]
-; BTVER2-NEXT: cvtps2pi %xmm0, %mm0 # sched: [3:1.00]
-; BTVER2-NEXT: por %mm0, %mm1 # sched: [1:0.50]
-; BTVER2-NEXT: movq %mm1, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_cvtps2pi:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: cvtps2pi (%rdi), %mm1 # sched: [12:1.00]
-; ZNVER1-NEXT: cvtps2pi %xmm0, %mm0 # sched: [4:1.00]
-; ZNVER1-NEXT: por %mm0, %mm1 # sched: [1:0.25]
-; ZNVER1-NEXT: movq %mm1, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float> %a0)
- %2 = load <4 x float>, <4 x float> *%a1, align 16
- %3 = call x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float> %2)
- %4 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %1, x86_mmx %3)
- %5 = bitcast x86_mmx %4 to i64
- ret i64 %5
-}
-declare x86_mmx @llvm.x86.sse.cvtps2pi(<4 x float>) nounwind readnone
-
-define i64 @test_cvttpd2pi(<2 x double> %a0, <2 x double>* %a1) optsize {
-; GENERIC-LABEL: test_cvttpd2pi:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: cvttpd2pi (%rdi), %mm0 # sched: [10:1.00]
-; GENERIC-NEXT: cvttpd2pi %xmm0, %mm1 # sched: [4:1.00]
-; GENERIC-NEXT: por %mm1, %mm0 # sched: [1:0.33]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvttpd2pi:
-; ATOM: # %bb.0:
-; ATOM-NEXT: cvttpd2pi (%rdi), %mm0 # sched: [8:4.00]
-; ATOM-NEXT: cvttpd2pi %xmm0, %mm1 # sched: [7:3.50]
-; ATOM-NEXT: por %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvttpd2pi:
-; SLM: # %bb.0:
-; SLM-NEXT: cvttpd2pi (%rdi), %mm1 # sched: [7:1.00]
-; SLM-NEXT: cvttpd2pi %xmm0, %mm0 # sched: [4:0.50]
-; SLM-NEXT: por %mm0, %mm1 # sched: [1:0.50]
-; SLM-NEXT: movq %mm1, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_cvttpd2pi:
-; SANDY: # %bb.0:
-; SANDY-NEXT: cvttpd2pi (%rdi), %mm0 # sched: [10:1.00]
-; SANDY-NEXT: cvttpd2pi %xmm0, %mm1 # sched: [4:1.00]
-; SANDY-NEXT: por %mm1, %mm0 # sched: [1:0.33]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_cvttpd2pi:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: cvttpd2pi (%rdi), %mm0 # sched: [10:1.00]
-; HASWELL-NEXT: cvttpd2pi %xmm0, %mm1 # sched: [4:1.00]
-; HASWELL-NEXT: por %mm1, %mm0 # sched: [1:0.33]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvttpd2pi:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: cvttpd2pi %xmm0, %mm0 # sched: [4:1.00]
-; BROADWELL-NEXT: cvttpd2pi (%rdi), %mm1 # sched: [9:1.00]
-; BROADWELL-NEXT: por %mm0, %mm1 # sched: [1:0.33]
-; BROADWELL-NEXT: movq %mm1, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvttpd2pi:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: cvttpd2pi %xmm0, %mm0 # sched: [5:1.00]
-; SKYLAKE-NEXT: cvttpd2pi (%rdi), %mm1 # sched: [11:1.00]
-; SKYLAKE-NEXT: por %mm0, %mm1 # sched: [1:0.50]
-; SKYLAKE-NEXT: movq %mm1, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvttpd2pi:
-; SKX: # %bb.0:
-; SKX-NEXT: cvttpd2pi %xmm0, %mm0 # sched: [5:1.00]
-; SKX-NEXT: cvttpd2pi (%rdi), %mm1 # sched: [11:1.00]
-; SKX-NEXT: por %mm0, %mm1 # sched: [1:0.50]
-; SKX-NEXT: movq %mm1, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_cvttpd2pi:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: cvttpd2pi (%rdi), %mm1 # sched: [13:1.00]
-; BDVER2-NEXT: cvttpd2pi %xmm0, %mm0 # sched: [6:1.00]
-; BDVER2-NEXT: por %mm0, %mm1 # sched: [2:0.50]
-; BDVER2-NEXT: movq %mm1, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_cvttpd2pi:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: cvttpd2pi (%rdi), %mm1 # sched: [8:1.00]
-; BTVER2-NEXT: cvttpd2pi %xmm0, %mm0 # sched: [3:1.00]
-; BTVER2-NEXT: por %mm0, %mm1 # sched: [1:0.50]
-; BTVER2-NEXT: movq %mm1, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_cvttpd2pi:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: cvttpd2pi (%rdi), %mm1 # sched: [12:1.00]
-; ZNVER1-NEXT: cvttpd2pi %xmm0, %mm0 # sched: [4:1.00]
-; ZNVER1-NEXT: por %mm0, %mm1 # sched: [1:0.25]
-; ZNVER1-NEXT: movq %mm1, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a0)
- %2 = load <2 x double>, <2 x double> *%a1, align 16
- %3 = call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %2)
- %4 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %1, x86_mmx %3)
- %5 = bitcast x86_mmx %4 to i64
- ret i64 %5
-}
-declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone
-
-define i64 @test_cvttps2pi(<4 x float> %a0, <4 x float>* %a1) optsize {
-; GENERIC-LABEL: test_cvttps2pi:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: cvttps2pi %xmm0, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT: cvttps2pi (%rdi), %mm1 # sched: [9:1.00]
-; GENERIC-NEXT: por %mm0, %mm1 # sched: [1:0.33]
-; GENERIC-NEXT: movq %mm1, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvttps2pi:
-; ATOM: # %bb.0:
-; ATOM-NEXT: cvttps2pi %xmm0, %mm0 # sched: [5:5.00]
-; ATOM-NEXT: cvttps2pi (%rdi), %mm1 # sched: [5:5.00]
-; ATOM-NEXT: por %mm0, %mm1 # sched: [1:0.50]
-; ATOM-NEXT: movq %mm1, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvttps2pi:
-; SLM: # %bb.0:
-; SLM-NEXT: cvttps2pi (%rdi), %mm1 # sched: [7:1.00]
-; SLM-NEXT: cvttps2pi %xmm0, %mm0 # sched: [4:0.50]
-; SLM-NEXT: por %mm0, %mm1 # sched: [1:0.50]
-; SLM-NEXT: movq %mm1, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_cvttps2pi:
-; SANDY: # %bb.0:
-; SANDY-NEXT: cvttps2pi %xmm0, %mm0 # sched: [3:1.00]
-; SANDY-NEXT: cvttps2pi (%rdi), %mm1 # sched: [9:1.00]
-; SANDY-NEXT: por %mm0, %mm1 # sched: [1:0.33]
-; SANDY-NEXT: movq %mm1, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_cvttps2pi:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: cvttps2pi %xmm0, %mm0 # sched: [4:1.00]
-; HASWELL-NEXT: cvttps2pi (%rdi), %mm1 # sched: [8:1.00]
-; HASWELL-NEXT: por %mm0, %mm1 # sched: [1:0.33]
-; HASWELL-NEXT: movq %mm1, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvttps2pi:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: cvttps2pi %xmm0, %mm0 # sched: [4:1.00]
-; BROADWELL-NEXT: cvttps2pi (%rdi), %mm1 # sched: [8:1.00]
-; BROADWELL-NEXT: por %mm0, %mm1 # sched: [1:0.33]
-; BROADWELL-NEXT: movq %mm1, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvttps2pi:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: cvttps2pi %xmm0, %mm0 # sched: [5:1.00]
-; SKYLAKE-NEXT: cvttps2pi (%rdi), %mm1 # sched: [9:0.50]
-; SKYLAKE-NEXT: por %mm0, %mm1 # sched: [1:0.50]
-; SKYLAKE-NEXT: movq %mm1, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvttps2pi:
-; SKX: # %bb.0:
-; SKX-NEXT: cvttps2pi %xmm0, %mm0 # sched: [5:1.00]
-; SKX-NEXT: cvttps2pi (%rdi), %mm1 # sched: [9:0.50]
-; SKX-NEXT: por %mm0, %mm1 # sched: [1:0.50]
-; SKX-NEXT: movq %mm1, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_cvttps2pi:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: cvttps2pi (%rdi), %mm1 # sched: [9:1.00]
-; BDVER2-NEXT: cvttps2pi %xmm0, %mm0 # sched: [4:1.00]
-; BDVER2-NEXT: por %mm0, %mm1 # sched: [2:0.50]
-; BDVER2-NEXT: movq %mm1, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_cvttps2pi:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: cvttps2pi (%rdi), %mm1 # sched: [8:1.00]
-; BTVER2-NEXT: cvttps2pi %xmm0, %mm0 # sched: [3:1.00]
-; BTVER2-NEXT: por %mm0, %mm1 # sched: [1:0.50]
-; BTVER2-NEXT: movq %mm1, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_cvttps2pi:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: cvttps2pi (%rdi), %mm1 # sched: [12:1.00]
-; ZNVER1-NEXT: cvttps2pi %xmm0, %mm0 # sched: [4:1.00]
-; ZNVER1-NEXT: por %mm0, %mm1 # sched: [1:0.25]
-; ZNVER1-NEXT: movq %mm1, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float> %a0)
- %2 = load <4 x float>, <4 x float> *%a1, align 16
- %3 = call x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float> %2)
- %4 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %1, x86_mmx %3)
- %5 = bitcast x86_mmx %4 to i64
- ret i64 %5
-}
-declare x86_mmx @llvm.x86.sse.cvttps2pi(<4 x float>) nounwind readnone
-
-define void @test_emms() optsize {
-; GENERIC-LABEL: test_emms:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: emms # sched: [31:10.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_emms:
-; ATOM: # %bb.0:
-; ATOM-NEXT: emms # sched: [5:2.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_emms:
-; SLM: # %bb.0:
-; SLM-NEXT: emms # sched: [10:5.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_emms:
-; SANDY: # %bb.0:
-; SANDY-NEXT: emms # sched: [31:10.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_emms:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: emms # sched: [31:10.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_emms:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: emms # sched: [31:10.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_emms:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: emms # sched: [10:4.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_emms:
-; SKX: # %bb.0:
-; SKX-NEXT: emms # sched: [10:4.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_emms:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: emms # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_emms:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: emms # sched: [2:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_emms:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: emms # sched: [2:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void @llvm.x86.mmx.emms()
- ret void
-}
-declare void @llvm.x86.mmx.emms()
-
-define void @test_maskmovq(x86_mmx %a0, x86_mmx %a1, i8* %a2) optsize {
-; GENERIC-LABEL: test_maskmovq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_maskmovq:
-; ATOM: # %bb.0:
-; ATOM-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_maskmovq:
-; SLM: # %bb.0:
-; SLM-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_maskmovq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_maskmovq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_maskmovq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_maskmovq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_maskmovq:
-; SKX: # %bb.0:
-; SKX-NEXT: maskmovq %mm1, %mm0 # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_maskmovq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: maskmovq %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_maskmovq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: maskmovq %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_maskmovq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: maskmovq %mm1, %mm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void @llvm.x86.mmx.maskmovq(x86_mmx %a0, x86_mmx %a1, i8* %a2)
- ret void
-}
-declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind
-
-define i32 @test_movd(x86_mmx %a0, i32 %a1, i32 *%a2) {
-; GENERIC-LABEL: test_movd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movd %edi, %mm1 # sched: [1:1.00]
-; GENERIC-NEXT: movd (%rsi), %mm2 # sched: [5:0.50]
-; GENERIC-NEXT: paddd %mm1, %mm2 # sched: [3:1.00]
-; GENERIC-NEXT: paddd %mm2, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT: movd %mm2, %ecx # sched: [2:1.00]
-; GENERIC-NEXT: movd %mm0, %eax # sched: [2:1.00]
-; GENERIC-NEXT: movl %ecx, (%rsi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movd %edi, %mm1 # sched: [1:1.00]
-; ATOM-NEXT: movd (%rsi), %mm2 # sched: [1:1.00]
-; ATOM-NEXT: paddd %mm1, %mm2 # sched: [1:0.50]
-; ATOM-NEXT: paddd %mm2, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: movd %mm2, %ecx # sched: [3:3.00]
-; ATOM-NEXT: movd %mm0, %eax # sched: [3:3.00]
-; ATOM-NEXT: movl %ecx, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movd:
-; SLM: # %bb.0:
-; SLM-NEXT: movd (%rsi), %mm2 # sched: [3:1.00]
-; SLM-NEXT: movd %edi, %mm1 # sched: [1:0.50]
-; SLM-NEXT: paddd %mm1, %mm2 # sched: [1:0.50]
-; SLM-NEXT: paddd %mm2, %mm0 # sched: [1:0.50]
-; SLM-NEXT: movd %mm2, %ecx # sched: [1:0.50]
-; SLM-NEXT: movd %mm0, %eax # sched: [1:0.50]
-; SLM-NEXT: movl %ecx, (%rsi) # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_movd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movd %edi, %mm1 # sched: [1:1.00]
-; SANDY-NEXT: movd (%rsi), %mm2 # sched: [5:0.50]
-; SANDY-NEXT: paddd %mm1, %mm2 # sched: [3:1.00]
-; SANDY-NEXT: paddd %mm2, %mm0 # sched: [3:1.00]
-; SANDY-NEXT: movd %mm2, %ecx # sched: [2:1.00]
-; SANDY-NEXT: movd %mm0, %eax # sched: [2:1.00]
-; SANDY-NEXT: movl %ecx, (%rsi) # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_movd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movd %edi, %mm1 # sched: [1:1.00]
-; HASWELL-NEXT: movd (%rsi), %mm2 # sched: [5:0.50]
-; HASWELL-NEXT: paddd %mm1, %mm2 # sched: [1:0.50]
-; HASWELL-NEXT: paddd %mm2, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT: movd %mm2, %ecx # sched: [1:1.00]
-; HASWELL-NEXT: movd %mm0, %eax # sched: [1:1.00]
-; HASWELL-NEXT: movl %ecx, (%rsi) # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movd %edi, %mm1 # sched: [1:1.00]
-; BROADWELL-NEXT: movd (%rsi), %mm2 # sched: [5:0.50]
-; BROADWELL-NEXT: paddd %mm1, %mm2 # sched: [1:0.50]
-; BROADWELL-NEXT: paddd %mm2, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT: movd %mm2, %ecx # sched: [1:1.00]
-; BROADWELL-NEXT: movd %mm0, %eax # sched: [1:1.00]
-; BROADWELL-NEXT: movl %ecx, (%rsi) # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movd %edi, %mm1 # sched: [1:1.00]
-; SKYLAKE-NEXT: movd (%rsi), %mm2 # sched: [5:0.50]
-; SKYLAKE-NEXT: paddd %mm1, %mm2 # sched: [1:0.50]
-; SKYLAKE-NEXT: paddd %mm2, %mm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: movd %mm2, %ecx # sched: [2:1.00]
-; SKYLAKE-NEXT: movd %mm0, %eax # sched: [2:1.00]
-; SKYLAKE-NEXT: movl %ecx, (%rsi) # sched: [1:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movd:
-; SKX: # %bb.0:
-; SKX-NEXT: movd %edi, %mm1 # sched: [1:1.00]
-; SKX-NEXT: movd (%rsi), %mm2 # sched: [5:0.50]
-; SKX-NEXT: paddd %mm1, %mm2 # sched: [1:0.50]
-; SKX-NEXT: paddd %mm2, %mm0 # sched: [1:0.50]
-; SKX-NEXT: movd %mm2, %ecx # sched: [2:1.00]
-; SKX-NEXT: movd %mm0, %eax # sched: [2:1.00]
-; SKX-NEXT: movl %ecx, (%rsi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_movd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movd %edi, %mm1 # sched: [10:0.50]
-; BDVER2-NEXT: movd (%rsi), %mm2 # sched: [5:0.50]
-; BDVER2-NEXT: paddd %mm1, %mm2 # sched: [2:0.50]
-; BDVER2-NEXT: paddd %mm2, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: movd %mm2, %ecx # sched: [10:1.00]
-; BDVER2-NEXT: movd %mm0, %eax # sched: [10:1.00]
-; BDVER2-NEXT: movl %ecx, (%rsi) # sched: [1:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_movd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movd %edi, %mm1 # sched: [8:0.50]
-; BTVER2-NEXT: movd (%rsi), %mm2 # sched: [5:1.00]
-; BTVER2-NEXT: paddd %mm1, %mm2 # sched: [1:0.50]
-; BTVER2-NEXT: paddd %mm2, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: movd %mm2, %ecx # sched: [4:1.00]
-; BTVER2-NEXT: movd %mm0, %eax # sched: [4:1.00]
-; BTVER2-NEXT: movl %ecx, (%rsi) # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_movd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movd (%rsi), %mm2 # sched: [8:0.50]
-; ZNVER1-NEXT: movd %edi, %mm1 # sched: [3:1.00]
-; ZNVER1-NEXT: paddd %mm1, %mm2 # sched: [1:0.25]
-; ZNVER1-NEXT: paddd %mm2, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: movd %mm2, %ecx # sched: [2:1.00]
-; ZNVER1-NEXT: movd %mm0, %eax # sched: [2:1.00]
-; ZNVER1-NEXT: movl %ecx, (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = insertelement <2 x i32> undef, i32 %a1, i32 0
- %2 = bitcast <2 x i32> %1 to x86_mmx
- %3 = load i32, i32 *%a2
- %4 = insertelement <2 x i32> undef, i32 %3, i32 0
- %5 = bitcast <2 x i32> %4 to x86_mmx
- %6 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %2, x86_mmx %5)
- %7 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %a0, x86_mmx %6)
- %8 = bitcast x86_mmx %6 to <2 x i32>
- %9 = bitcast x86_mmx %7 to <2 x i32>
- %10 = extractelement <2 x i32> %8, i32 0
- %11 = extractelement <2 x i32> %9, i32 0
- store i32 %10, i32* %a2
- ret i32 %11
-}
-
-define i64 @test_movdq2q(<2 x i64> %a0) optsize {
-; GENERIC-LABEL: test_movdq2q:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movdq2q %xmm0, %mm0 # sched: [2:1.00]
-; GENERIC-NEXT: paddd %mm0, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movdq2q:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movdq2q %xmm0, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: paddd %mm0, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movdq2q:
-; SLM: # %bb.0:
-; SLM-NEXT: movdq2q %xmm0, %mm0 # sched: [1:0.50]
-; SLM-NEXT: paddd %mm0, %mm0 # sched: [1:0.50]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_movdq2q:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movdq2q %xmm0, %mm0 # sched: [2:1.00]
-; SANDY-NEXT: paddd %mm0, %mm0 # sched: [3:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_movdq2q:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movdq2q %xmm0, %mm0 # sched: [2:0.67]
-; HASWELL-NEXT: paddd %mm0, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movdq2q:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movdq2q %xmm0, %mm0 # sched: [2:0.67]
-; BROADWELL-NEXT: paddd %mm0, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movdq2q:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movdq2q %xmm0, %mm0 # sched: [2:1.00]
-; SKYLAKE-NEXT: paddd %mm0, %mm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movdq2q:
-; SKX: # %bb.0:
-; SKX-NEXT: movdq2q %xmm0, %mm0 # sched: [2:1.00]
-; SKX-NEXT: paddd %mm0, %mm0 # sched: [1:0.50]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_movdq2q:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movdq2q %xmm0, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: paddd %mm0, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_movdq2q:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movdq2q %xmm0, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: paddd %mm0, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_movdq2q:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movdq2q %xmm0, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: paddd %mm0, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = extractelement <2 x i64> %a0, i32 0
- %2 = bitcast i64 %1 to x86_mmx
- %3 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %2, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-
-define void @test_movntq(x86_mmx* %a0, x86_mmx %a1) optsize {
-; GENERIC-LABEL: test_movntq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movntq:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movntq:
-; SLM: # %bb.0:
-; SLM-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_movntq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_movntq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movntq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movntq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movntq:
-; SKX: # %bb.0:
-; SKX-NEXT: movntq %mm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_movntq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movntq %mm0, (%rdi) # sched: [2:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_movntq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movntq %mm0, (%rdi) # sched: [2:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_movntq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movntq %mm0, (%rdi) # sched: [1:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void @llvm.x86.mmx.movnt.dq(x86_mmx* %a0, x86_mmx %a1)
- ret void
-}
-declare void @llvm.x86.mmx.movnt.dq(x86_mmx*, x86_mmx) nounwind
-
-define void @test_movq(i64 *%a0) {
-; GENERIC-LABEL: test_movq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movq (%rdi), %mm0 # sched: [5:0.50]
-; GENERIC-NEXT: paddd %mm0, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT: movq %mm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movq:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movq (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: paddd %mm0, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: movq %mm0, (%rdi) # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movq:
-; SLM: # %bb.0:
-; SLM-NEXT: movq (%rdi), %mm0 # sched: [3:1.00]
-; SLM-NEXT: paddd %mm0, %mm0 # sched: [1:0.50]
-; SLM-NEXT: movq %mm0, (%rdi) # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_movq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movq (%rdi), %mm0 # sched: [5:0.50]
-; SANDY-NEXT: paddd %mm0, %mm0 # sched: [3:1.00]
-; SANDY-NEXT: movq %mm0, (%rdi) # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_movq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movq (%rdi), %mm0 # sched: [5:0.50]
-; HASWELL-NEXT: paddd %mm0, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT: movq %mm0, (%rdi) # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movq (%rdi), %mm0 # sched: [5:0.50]
-; BROADWELL-NEXT: paddd %mm0, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT: movq %mm0, (%rdi) # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movq (%rdi), %mm0 # sched: [5:0.50]
-; SKYLAKE-NEXT: paddd %mm0, %mm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: movq %mm0, (%rdi) # sched: [1:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movq:
-; SKX: # %bb.0:
-; SKX-NEXT: movq (%rdi), %mm0 # sched: [5:0.50]
-; SKX-NEXT: paddd %mm0, %mm0 # sched: [1:0.50]
-; SKX-NEXT: movq %mm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_movq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movq (%rdi), %mm0 # sched: [5:0.50]
-; BDVER2-NEXT: paddd %mm0, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: movq %mm0, (%rdi) # sched: [2:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_movq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movq (%rdi), %mm0 # sched: [5:1.00]
-; BTVER2-NEXT: paddd %mm0, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: movq %mm0, (%rdi) # sched: [2:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_movq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movq (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: paddd %mm0, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: movq %mm0, (%rdi) # sched: [1:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i64, i64* %a0, align 8
- %2 = bitcast i64 %1 to x86_mmx
- %3 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %2, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- store i64 %4, i64* %a0, align 8
- ret void
-}
-
-define <2 x i64> @test_movq2dq(x86_mmx %a0) optsize {
-; GENERIC-LABEL: test_movq2dq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movq2dq %mm0, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movq2dq:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movq2dq %mm0, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movq2dq:
-; SLM: # %bb.0:
-; SLM-NEXT: movq2dq %mm0, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_movq2dq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movq2dq %mm0, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_movq2dq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movq2dq %mm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movq2dq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movq2dq %mm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movq2dq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movq2dq %mm0, %xmm0 # sched: [2:2.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movq2dq:
-; SKX: # %bb.0:
-; SKX-NEXT: movq2dq %mm0, %xmm0 # sched: [2:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_movq2dq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movq2dq %mm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_movq2dq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movq2dq %mm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_movq2dq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movq2dq %mm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = bitcast x86_mmx %a0 to i64
- %2 = insertelement <2 x i64> undef, i64 %1, i32 0
- ret <2 x i64> %2
-}
-
-define i64 @test_pabsb(x86_mmx *%a0) optsize {
-; GENERIC-LABEL: test_pabsb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pabsb (%rdi), %mm0 # sched: [6:0.50]
-; GENERIC-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pabsb:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pabsb (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pabsb:
-; SLM: # %bb.0:
-; SLM-NEXT: pabsb (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pabsb:
-; SANDY: # %bb.0:
-; SANDY-NEXT: pabsb (%rdi), %mm0 # sched: [6:0.50]
-; SANDY-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pabsb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: pabsb (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pabsb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: pabsb (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pabsb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: pabsb (%rdi), %mm0 # sched: [6:0.50]
-; SKYLAKE-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pabsb:
-; SKX: # %bb.0:
-; SKX-NEXT: pabsb (%rdi), %mm0 # sched: [6:0.50]
-; SKX-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_pabsb:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: pabsb (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: pabsb %mm0, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pabsb:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: pabsb (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pabsb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: pabsb (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: pabsb %mm0, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load x86_mmx, x86_mmx *%a0, align 8
- %2 = call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %1)
- %3 = call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone
-
-define i64 @test_pabsd(x86_mmx *%a0) optsize {
-; GENERIC-LABEL: test_pabsd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pabsd (%rdi), %mm0 # sched: [6:0.50]
-; GENERIC-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pabsd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pabsd (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pabsd:
-; SLM: # %bb.0:
-; SLM-NEXT: pabsd (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pabsd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: pabsd (%rdi), %mm0 # sched: [6:0.50]
-; SANDY-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pabsd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: pabsd (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pabsd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: pabsd (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pabsd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: pabsd (%rdi), %mm0 # sched: [6:0.50]
-; SKYLAKE-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pabsd:
-; SKX: # %bb.0:
-; SKX-NEXT: pabsd (%rdi), %mm0 # sched: [6:0.50]
-; SKX-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_pabsd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: pabsd (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: pabsd %mm0, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pabsd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: pabsd (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pabsd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: pabsd (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: pabsd %mm0, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load x86_mmx, x86_mmx *%a0, align 8
- %2 = call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %1)
- %3 = call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone
-
-define i64 @test_pabsw(x86_mmx *%a0) optsize {
-; GENERIC-LABEL: test_pabsw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pabsw (%rdi), %mm0 # sched: [6:0.50]
-; GENERIC-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pabsw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pabsw (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pabsw:
-; SLM: # %bb.0:
-; SLM-NEXT: pabsw (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pabsw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: pabsw (%rdi), %mm0 # sched: [6:0.50]
-; SANDY-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pabsw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: pabsw (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pabsw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: pabsw (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pabsw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: pabsw (%rdi), %mm0 # sched: [6:0.50]
-; SKYLAKE-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pabsw:
-; SKX: # %bb.0:
-; SKX-NEXT: pabsw (%rdi), %mm0 # sched: [6:0.50]
-; SKX-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_pabsw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: pabsw (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: pabsw %mm0, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pabsw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: pabsw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pabsw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: pabsw (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: pabsw %mm0, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load x86_mmx, x86_mmx *%a0, align 8
- %2 = call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %1)
- %3 = call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone
-
-define i64 @test_packssdw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_packssdw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: packssdw %mm1, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT: packssdw (%rdi), %mm0 # sched: [6:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_packssdw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: packssdw %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: packssdw (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_packssdw:
-; SLM: # %bb.0:
-; SLM-NEXT: packssdw %mm1, %mm0 # sched: [1:1.00]
-; SLM-NEXT: packssdw (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_packssdw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: packssdw %mm1, %mm0 # sched: [1:1.00]
-; SANDY-NEXT: packssdw (%rdi), %mm0 # sched: [6:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_packssdw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: packssdw %mm1, %mm0 # sched: [3:2.00]
-; HASWELL-NEXT: packssdw (%rdi), %mm0 # sched: [7:2.00]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_packssdw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: packssdw %mm1, %mm0 # sched: [3:2.00]
-; BROADWELL-NEXT: packssdw (%rdi), %mm0 # sched: [7:2.00]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_packssdw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: packssdw %mm1, %mm0 # sched: [3:2.00]
-; SKYLAKE-NEXT: packssdw (%rdi), %mm0 # sched: [7:2.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_packssdw:
-; SKX: # %bb.0:
-; SKX-NEXT: packssdw %mm1, %mm0 # sched: [3:2.00]
-; SKX-NEXT: packssdw (%rdi), %mm0 # sched: [7:2.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_packssdw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: packssdw %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: packssdw (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_packssdw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: packssdw %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: packssdw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_packssdw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: packssdw %mm1, %mm0 # sched: [1:0.50]
-; ZNVER1-NEXT: packssdw (%rdi), %mm0 # sched: [1:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_packsswb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_packsswb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: packsswb %mm1, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT: packsswb (%rdi), %mm0 # sched: [6:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_packsswb:
-; ATOM: # %bb.0:
-; ATOM-NEXT: packsswb %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: packsswb (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_packsswb:
-; SLM: # %bb.0:
-; SLM-NEXT: packsswb %mm1, %mm0 # sched: [1:1.00]
-; SLM-NEXT: packsswb (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_packsswb:
-; SANDY: # %bb.0:
-; SANDY-NEXT: packsswb %mm1, %mm0 # sched: [1:1.00]
-; SANDY-NEXT: packsswb (%rdi), %mm0 # sched: [6:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_packsswb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: packsswb %mm1, %mm0 # sched: [3:2.00]
-; HASWELL-NEXT: packsswb (%rdi), %mm0 # sched: [7:2.00]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_packsswb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: packsswb %mm1, %mm0 # sched: [3:2.00]
-; BROADWELL-NEXT: packsswb (%rdi), %mm0 # sched: [7:2.00]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_packsswb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: packsswb %mm1, %mm0 # sched: [3:2.00]
-; SKYLAKE-NEXT: packsswb (%rdi), %mm0 # sched: [7:2.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_packsswb:
-; SKX: # %bb.0:
-; SKX-NEXT: packsswb %mm1, %mm0 # sched: [3:2.00]
-; SKX-NEXT: packsswb (%rdi), %mm0 # sched: [7:2.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_packsswb:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: packsswb %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: packsswb (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_packsswb:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: packsswb %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: packsswb (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_packsswb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: packsswb %mm1, %mm0 # sched: [1:0.50]
-; ZNVER1-NEXT: packsswb (%rdi), %mm0 # sched: [1:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_packuswb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_packuswb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: packuswb %mm1, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT: packuswb (%rdi), %mm0 # sched: [6:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_packuswb:
-; ATOM: # %bb.0:
-; ATOM-NEXT: packuswb %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: packuswb (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_packuswb:
-; SLM: # %bb.0:
-; SLM-NEXT: packuswb %mm1, %mm0 # sched: [1:1.00]
-; SLM-NEXT: packuswb (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_packuswb:
-; SANDY: # %bb.0:
-; SANDY-NEXT: packuswb %mm1, %mm0 # sched: [1:1.00]
-; SANDY-NEXT: packuswb (%rdi), %mm0 # sched: [6:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_packuswb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: packuswb %mm1, %mm0 # sched: [3:2.00]
-; HASWELL-NEXT: packuswb (%rdi), %mm0 # sched: [7:2.00]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_packuswb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: packuswb %mm1, %mm0 # sched: [3:2.00]
-; BROADWELL-NEXT: packuswb (%rdi), %mm0 # sched: [7:2.00]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_packuswb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: packuswb %mm1, %mm0 # sched: [3:2.00]
-; SKYLAKE-NEXT: packuswb (%rdi), %mm0 # sched: [7:2.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_packuswb:
-; SKX: # %bb.0:
-; SKX-NEXT: packuswb %mm1, %mm0 # sched: [3:2.00]
-; SKX-NEXT: packuswb (%rdi), %mm0 # sched: [7:2.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_packuswb:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: packuswb %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: packuswb (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_packuswb:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: packuswb %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: packuswb (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_packuswb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: packuswb %mm1, %mm0 # sched: [1:0.50]
-; ZNVER1-NEXT: packuswb (%rdi), %mm0 # sched: [1:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_paddb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_paddb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: paddb %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT: paddb (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_paddb:
-; ATOM: # %bb.0:
-; ATOM-NEXT: paddb %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: paddb (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_paddb:
-; SLM: # %bb.0:
-; SLM-NEXT: paddb %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: paddb (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_paddb:
-; SANDY: # %bb.0:
-; SANDY-NEXT: paddb %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT: paddb (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_paddb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: paddb %mm1, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT: paddb (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_paddb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: paddb %mm1, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT: paddb (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_paddb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: paddb %mm1, %mm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: paddb (%rdi), %mm0 # sched: [6:0.50]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_paddb:
-; SKX: # %bb.0:
-; SKX-NEXT: paddb %mm1, %mm0 # sched: [1:0.50]
-; SKX-NEXT: paddb (%rdi), %mm0 # sched: [6:0.50]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_paddb:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: paddb %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: paddb (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_paddb:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: paddb %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: paddb (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_paddb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: paddb %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: paddb (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_paddd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_paddd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: paddd %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT: paddd (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_paddd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: paddd %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: paddd (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_paddd:
-; SLM: # %bb.0:
-; SLM-NEXT: paddd %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: paddd (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_paddd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: paddd %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT: paddd (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_paddd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: paddd %mm1, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT: paddd (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_paddd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: paddd %mm1, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT: paddd (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_paddd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: paddd %mm1, %mm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: paddd (%rdi), %mm0 # sched: [6:0.50]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_paddd:
-; SKX: # %bb.0:
-; SKX-NEXT: paddd %mm1, %mm0 # sched: [1:0.50]
-; SKX-NEXT: paddd (%rdi), %mm0 # sched: [6:0.50]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_paddd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: paddd %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: paddd (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_paddd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: paddd %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: paddd (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_paddd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: paddd %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: paddd (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_paddq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_paddq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: paddq %mm1, %mm0 # sched: [1:0.50]
-; GENERIC-NEXT: paddq (%rdi), %mm0 # sched: [7:0.50]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_paddq:
-; ATOM: # %bb.0:
-; ATOM-NEXT: paddq %mm1, %mm0 # sched: [2:1.00]
-; ATOM-NEXT: paddq (%rdi), %mm0 # sched: [3:1.50]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_paddq:
-; SLM: # %bb.0:
-; SLM-NEXT: paddq %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: paddq (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_paddq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: paddq %mm1, %mm0 # sched: [1:0.50]
-; SANDY-NEXT: paddq (%rdi), %mm0 # sched: [7:0.50]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_paddq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: paddq %mm1, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT: paddq (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_paddq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: paddq %mm1, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT: paddq (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_paddq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: paddq %mm1, %mm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: paddq (%rdi), %mm0 # sched: [6:0.50]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_paddq:
-; SKX: # %bb.0:
-; SKX-NEXT: paddq %mm1, %mm0 # sched: [1:0.50]
-; SKX-NEXT: paddq (%rdi), %mm0 # sched: [6:0.50]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_paddq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: paddq %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: paddq (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_paddq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: paddq %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: paddq (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_paddq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: paddq %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: paddq (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_paddsb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_paddsb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: paddsb %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT: paddsb (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_paddsb:
-; ATOM: # %bb.0:
-; ATOM-NEXT: paddsb %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: paddsb (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_paddsb:
-; SLM: # %bb.0:
-; SLM-NEXT: paddsb %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: paddsb (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_paddsb:
-; SANDY: # %bb.0:
-; SANDY-NEXT: paddsb %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT: paddsb (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_paddsb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: paddsb %mm1, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT: paddsb (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_paddsb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: paddsb %mm1, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT: paddsb (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_paddsb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: paddsb %mm1, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: paddsb (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_paddsb:
-; SKX: # %bb.0:
-; SKX-NEXT: paddsb %mm1, %mm0 # sched: [1:1.00]
-; SKX-NEXT: paddsb (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_paddsb:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: paddsb %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: paddsb (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_paddsb:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: paddsb %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: paddsb (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_paddsb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: paddsb %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: paddsb (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_paddsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_paddsw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: paddsw %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT: paddsw (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_paddsw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: paddsw %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: paddsw (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_paddsw:
-; SLM: # %bb.0:
-; SLM-NEXT: paddsw %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: paddsw (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_paddsw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: paddsw %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT: paddsw (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_paddsw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: paddsw %mm1, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT: paddsw (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_paddsw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: paddsw %mm1, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT: paddsw (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_paddsw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: paddsw %mm1, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: paddsw (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_paddsw:
-; SKX: # %bb.0:
-; SKX-NEXT: paddsw %mm1, %mm0 # sched: [1:1.00]
-; SKX-NEXT: paddsw (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_paddsw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: paddsw %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: paddsw (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_paddsw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: paddsw %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: paddsw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_paddsw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: paddsw %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: paddsw (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_paddusb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_paddusb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: paddusb %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT: paddusb (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_paddusb:
-; ATOM: # %bb.0:
-; ATOM-NEXT: paddusb %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: paddusb (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_paddusb:
-; SLM: # %bb.0:
-; SLM-NEXT: paddusb %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: paddusb (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_paddusb:
-; SANDY: # %bb.0:
-; SANDY-NEXT: paddusb %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT: paddusb (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_paddusb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: paddusb %mm1, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT: paddusb (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_paddusb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: paddusb %mm1, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT: paddusb (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_paddusb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: paddusb %mm1, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: paddusb (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_paddusb:
-; SKX: # %bb.0:
-; SKX-NEXT: paddusb %mm1, %mm0 # sched: [1:1.00]
-; SKX-NEXT: paddusb (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_paddusb:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: paddusb %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: paddusb (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_paddusb:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: paddusb %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: paddusb (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_paddusb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: paddusb %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: paddusb (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_paddusw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_paddusw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: paddusw %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT: paddusw (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_paddusw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: paddusw %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: paddusw (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_paddusw:
-; SLM: # %bb.0:
-; SLM-NEXT: paddusw %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: paddusw (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_paddusw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: paddusw %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT: paddusw (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_paddusw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: paddusw %mm1, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT: paddusw (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_paddusw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: paddusw %mm1, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT: paddusw (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_paddusw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: paddusw %mm1, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: paddusw (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_paddusw:
-; SKX: # %bb.0:
-; SKX-NEXT: paddusw %mm1, %mm0 # sched: [1:1.00]
-; SKX-NEXT: paddusw (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_paddusw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: paddusw %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: paddusw (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_paddusw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: paddusw %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: paddusw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_paddusw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: paddusw %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: paddusw (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_paddw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_paddw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: paddw %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT: paddw (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_paddw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: paddw %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: paddw (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_paddw:
-; SLM: # %bb.0:
-; SLM-NEXT: paddw %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: paddw (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_paddw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: paddw %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT: paddw (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_paddw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: paddw %mm1, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT: paddw (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_paddw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: paddw %mm1, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT: paddw (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_paddw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: paddw %mm1, %mm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: paddw (%rdi), %mm0 # sched: [6:0.50]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_paddw:
-; SKX: # %bb.0:
-; SKX-NEXT: paddw %mm1, %mm0 # sched: [1:0.50]
-; SKX-NEXT: paddw (%rdi), %mm0 # sched: [6:0.50]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_paddw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: paddw %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: paddw (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_paddw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: paddw %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: paddw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_paddw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: paddw %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: paddw (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_palignr(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_palignr:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: palignr $1, %mm1, %mm0 # sched: [1:0.50]
-; GENERIC-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:0.50]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_palignr:
-; ATOM: # %bb.0:
-; ATOM-NEXT: palignr $1, %mm1, %mm0 # sched: [1:1.00]
-; ATOM-NEXT: palignr $1, (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_palignr:
-; SLM: # %bb.0:
-; SLM-NEXT: palignr $1, %mm1, %mm0 # sched: [1:1.00]
-; SLM-NEXT: palignr $1, (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_palignr:
-; SANDY: # %bb.0:
-; SANDY-NEXT: palignr $1, %mm1, %mm0 # sched: [1:0.50]
-; SANDY-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:0.50]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_palignr:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: palignr $1, %mm1, %mm0 # sched: [1:1.00]
-; HASWELL-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:1.00]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_palignr:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: palignr $1, %mm1, %mm0 # sched: [1:1.00]
-; BROADWELL-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:1.00]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_palignr:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: palignr $1, %mm1, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_palignr:
-; SKX: # %bb.0:
-; SKX-NEXT: palignr $1, %mm1, %mm0 # sched: [1:1.00]
-; SKX-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_palignr:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: palignr $1, %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: palignr $1, (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_palignr:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: palignr $1, %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_palignr:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: palignr $1, %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: palignr $1, (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %a0, x86_mmx %a1, i8 1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %1, x86_mmx %2, i8 1)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone
-
-define i64 @test_pand(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_pand:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pand %mm1, %mm0 # sched: [1:0.33]
-; GENERIC-NEXT: pand (%rdi), %mm0 # sched: [6:0.50]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pand:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pand %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: pand (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pand:
-; SLM: # %bb.0:
-; SLM-NEXT: pand %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: pand (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pand:
-; SANDY: # %bb.0:
-; SANDY-NEXT: pand %mm1, %mm0 # sched: [1:0.33]
-; SANDY-NEXT: pand (%rdi), %mm0 # sched: [6:0.50]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pand:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: pand %mm1, %mm0 # sched: [1:0.33]
-; HASWELL-NEXT: pand (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pand:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: pand %mm1, %mm0 # sched: [1:0.33]
-; BROADWELL-NEXT: pand (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pand:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: pand %mm1, %mm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: pand (%rdi), %mm0 # sched: [6:0.50]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pand:
-; SKX: # %bb.0:
-; SKX-NEXT: pand %mm1, %mm0 # sched: [1:0.50]
-; SKX-NEXT: pand (%rdi), %mm0 # sched: [6:0.50]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_pand:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: pand %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: pand (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pand:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: pand %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: pand (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pand:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: pand %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: pand (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.pand(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.pand(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pandn(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_pandn:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pandn %mm1, %mm0 # sched: [1:0.33]
-; GENERIC-NEXT: pandn (%rdi), %mm0 # sched: [6:0.50]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pandn:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pandn %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: pandn (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pandn:
-; SLM: # %bb.0:
-; SLM-NEXT: pandn %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: pandn (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pandn:
-; SANDY: # %bb.0:
-; SANDY-NEXT: pandn %mm1, %mm0 # sched: [1:0.33]
-; SANDY-NEXT: pandn (%rdi), %mm0 # sched: [6:0.50]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pandn:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: pandn %mm1, %mm0 # sched: [1:0.33]
-; HASWELL-NEXT: pandn (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pandn:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: pandn %mm1, %mm0 # sched: [1:0.33]
-; BROADWELL-NEXT: pandn (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pandn:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: pandn %mm1, %mm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: pandn (%rdi), %mm0 # sched: [6:0.50]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pandn:
-; SKX: # %bb.0:
-; SKX-NEXT: pandn %mm1, %mm0 # sched: [1:0.50]
-; SKX-NEXT: pandn (%rdi), %mm0 # sched: [6:0.50]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_pandn:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: pandn %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: pandn (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pandn:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: pandn %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: pandn (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pandn:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: pandn %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: pandn (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pavgb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_pavgb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pavgb %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT: pavgb (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pavgb:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pavgb %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: pavgb (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pavgb:
-; SLM: # %bb.0:
-; SLM-NEXT: pavgb %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: pavgb (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pavgb:
-; SANDY: # %bb.0:
-; SANDY-NEXT: pavgb %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT: pavgb (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pavgb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: pavgb %mm1, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT: pavgb (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pavgb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: pavgb %mm1, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT: pavgb (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pavgb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: pavgb %mm1, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: pavgb (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pavgb:
-; SKX: # %bb.0:
-; SKX-NEXT: pavgb %mm1, %mm0 # sched: [1:1.00]
-; SKX-NEXT: pavgb (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_pavgb:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: pavgb %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: pavgb (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pavgb:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: pavgb %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: pavgb (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pavgb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: pavgb %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: pavgb (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pavgw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_pavgw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pavgw %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT: pavgw (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pavgw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pavgw %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: pavgw (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pavgw:
-; SLM: # %bb.0:
-; SLM-NEXT: pavgw %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: pavgw (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pavgw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: pavgw %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT: pavgw (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pavgw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: pavgw %mm1, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT: pavgw (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pavgw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: pavgw %mm1, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT: pavgw (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pavgw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: pavgw %mm1, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: pavgw (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pavgw:
-; SKX: # %bb.0:
-; SKX-NEXT: pavgw %mm1, %mm0 # sched: [1:1.00]
-; SKX-NEXT: pavgw (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_pavgw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: pavgw %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: pavgw (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pavgw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: pavgw %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: pavgw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pavgw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: pavgw %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: pavgw (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pcmpeqb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_pcmpeqb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pcmpeqb %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT: pcmpeqb (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pcmpeqb:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: pcmpeqb (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pcmpeqb:
-; SLM: # %bb.0:
-; SLM-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: pcmpeqb (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pcmpeqb:
-; SANDY: # %bb.0:
-; SANDY-NEXT: pcmpeqb %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT: pcmpeqb (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pcmpeqb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT: pcmpeqb (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pcmpeqb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT: pcmpeqb (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pcmpeqb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: pcmpeqb (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pcmpeqb:
-; SKX: # %bb.0:
-; SKX-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:1.00]
-; SKX-NEXT: pcmpeqb (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_pcmpeqb:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: pcmpeqb %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: pcmpeqb (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pcmpeqb:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: pcmpeqb (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pcmpeqb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: pcmpeqb (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pcmpeqd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_pcmpeqd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pcmpeqd %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT: pcmpeqd (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pcmpeqd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: pcmpeqd (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pcmpeqd:
-; SLM: # %bb.0:
-; SLM-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: pcmpeqd (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pcmpeqd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: pcmpeqd %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT: pcmpeqd (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pcmpeqd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT: pcmpeqd (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pcmpeqd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT: pcmpeqd (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pcmpeqd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: pcmpeqd (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pcmpeqd:
-; SKX: # %bb.0:
-; SKX-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:1.00]
-; SKX-NEXT: pcmpeqd (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_pcmpeqd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: pcmpeqd %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: pcmpeqd (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pcmpeqd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: pcmpeqd (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pcmpeqd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: pcmpeqd (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pcmpeqw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_pcmpeqw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pcmpeqw %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT: pcmpeqw (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pcmpeqw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: pcmpeqw (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pcmpeqw:
-; SLM: # %bb.0:
-; SLM-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: pcmpeqw (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pcmpeqw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: pcmpeqw %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT: pcmpeqw (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pcmpeqw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT: pcmpeqw (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pcmpeqw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT: pcmpeqw (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pcmpeqw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: pcmpeqw (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pcmpeqw:
-; SKX: # %bb.0:
-; SKX-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:1.00]
-; SKX-NEXT: pcmpeqw (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_pcmpeqw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: pcmpeqw %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: pcmpeqw (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pcmpeqw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: pcmpeqw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pcmpeqw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: pcmpeqw (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pcmpgtb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_pcmpgtb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pcmpgtb %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT: pcmpgtb (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pcmpgtb:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: pcmpgtb (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pcmpgtb:
-; SLM: # %bb.0:
-; SLM-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: pcmpgtb (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pcmpgtb:
-; SANDY: # %bb.0:
-; SANDY-NEXT: pcmpgtb %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT: pcmpgtb (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pcmpgtb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT: pcmpgtb (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pcmpgtb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT: pcmpgtb (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pcmpgtb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: pcmpgtb (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pcmpgtb:
-; SKX: # %bb.0:
-; SKX-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:1.00]
-; SKX-NEXT: pcmpgtb (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_pcmpgtb:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: pcmpgtb %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: pcmpgtb (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pcmpgtb:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: pcmpgtb (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pcmpgtb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: pcmpgtb (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pcmpgtd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_pcmpgtd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pcmpgtd %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT: pcmpgtd (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pcmpgtd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: pcmpgtd (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pcmpgtd:
-; SLM: # %bb.0:
-; SLM-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: pcmpgtd (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pcmpgtd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: pcmpgtd %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT: pcmpgtd (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pcmpgtd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT: pcmpgtd (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pcmpgtd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT: pcmpgtd (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pcmpgtd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: pcmpgtd (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pcmpgtd:
-; SKX: # %bb.0:
-; SKX-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:1.00]
-; SKX-NEXT: pcmpgtd (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_pcmpgtd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: pcmpgtd %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: pcmpgtd (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pcmpgtd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: pcmpgtd (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pcmpgtd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: pcmpgtd (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pcmpgtw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_pcmpgtw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pcmpgtw %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT: pcmpgtw (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pcmpgtw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: pcmpgtw (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pcmpgtw:
-; SLM: # %bb.0:
-; SLM-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: pcmpgtw (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pcmpgtw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: pcmpgtw %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT: pcmpgtw (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pcmpgtw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT: pcmpgtw (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pcmpgtw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT: pcmpgtw (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pcmpgtw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: pcmpgtw (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pcmpgtw:
-; SKX: # %bb.0:
-; SKX-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:1.00]
-; SKX-NEXT: pcmpgtw (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_pcmpgtw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: pcmpgtw %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: pcmpgtw (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pcmpgtw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: pcmpgtw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pcmpgtw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: pcmpgtw (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone
-
-define i32 @test_pextrw(x86_mmx %a0) optsize {
-; GENERIC-LABEL: test_pextrw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pextrw $0, %mm0, %eax # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pextrw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pextrw $0, %mm0, %eax # sched: [4:2.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pextrw:
-; SLM: # %bb.0:
-; SLM-NEXT: pextrw $0, %mm0, %eax # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pextrw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: pextrw $0, %mm0, %eax # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pextrw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: pextrw $0, %mm0, %eax # sched: [2:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pextrw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: pextrw $0, %mm0, %eax # sched: [2:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pextrw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: pextrw $0, %mm0, %eax # sched: [3:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pextrw:
-; SKX: # %bb.0:
-; SKX-NEXT: pextrw $0, %mm0, %eax # sched: [3:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_pextrw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: pextrw $0, %mm0, %eax # sched: [13:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pextrw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: pextrw $0, %mm0, %eax # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pextrw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: pextrw $0, %mm0, %eax # sched: [2:2.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call i32 @llvm.x86.mmx.pextr.w(x86_mmx %a0, i32 0)
- ret i32 %1
-}
-declare i32 @llvm.x86.mmx.pextr.w(x86_mmx, i32) nounwind readnone
-
-define i64 @test_phaddd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_phaddd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: phaddd %mm1, %mm0 # sched: [3:1.50]
-; GENERIC-NEXT: phaddd (%rdi), %mm0 # sched: [8:1.50]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_phaddd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: phaddd %mm1, %mm0 # sched: [3:1.50]
-; ATOM-NEXT: phaddd (%rdi), %mm0 # sched: [4:2.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_phaddd:
-; SLM: # %bb.0:
-; SLM-NEXT: phaddd %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: phaddd (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_phaddd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: phaddd %mm1, %mm0 # sched: [3:1.50]
-; SANDY-NEXT: phaddd (%rdi), %mm0 # sched: [8:1.50]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_phaddd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: phaddd %mm1, %mm0 # sched: [3:2.00]
-; HASWELL-NEXT: phaddd (%rdi), %mm0 # sched: [8:2.00]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_phaddd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: phaddd %mm1, %mm0 # sched: [3:2.00]
-; BROADWELL-NEXT: phaddd (%rdi), %mm0 # sched: [8:2.00]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_phaddd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: phaddd %mm1, %mm0 # sched: [3:2.00]
-; SKYLAKE-NEXT: phaddd (%rdi), %mm0 # sched: [8:2.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_phaddd:
-; SKX: # %bb.0:
-; SKX-NEXT: phaddd %mm1, %mm0 # sched: [3:2.00]
-; SKX-NEXT: phaddd (%rdi), %mm0 # sched: [8:2.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_phaddd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: phaddd %mm1, %mm0 # sched: [5:0.50]
-; BDVER2-NEXT: phaddd (%rdi), %mm0 # sched: [10:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_phaddd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: phaddd %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: phaddd (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_phaddd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: phaddd %mm1, %mm0 # sched: [100:0.25]
-; ZNVER1-NEXT: phaddd (%rdi), %mm0 # sched: [100:0.25]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_phaddsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_phaddsw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: phaddsw %mm1, %mm0 # sched: [3:1.50]
-; GENERIC-NEXT: phaddsw (%rdi), %mm0 # sched: [8:1.50]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_phaddsw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: phaddsw %mm1, %mm0 # sched: [5:2.50]
-; ATOM-NEXT: phaddsw (%rdi), %mm0 # sched: [6:3.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_phaddsw:
-; SLM: # %bb.0:
-; SLM-NEXT: phaddsw %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: phaddsw (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_phaddsw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: phaddsw %mm1, %mm0 # sched: [3:1.50]
-; SANDY-NEXT: phaddsw (%rdi), %mm0 # sched: [8:1.50]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_phaddsw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: phaddsw %mm1, %mm0 # sched: [3:2.00]
-; HASWELL-NEXT: phaddsw (%rdi), %mm0 # sched: [8:2.00]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_phaddsw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: phaddsw %mm1, %mm0 # sched: [3:2.00]
-; BROADWELL-NEXT: phaddsw (%rdi), %mm0 # sched: [8:2.00]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_phaddsw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: phaddsw %mm1, %mm0 # sched: [3:2.00]
-; SKYLAKE-NEXT: phaddsw (%rdi), %mm0 # sched: [8:2.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_phaddsw:
-; SKX: # %bb.0:
-; SKX-NEXT: phaddsw %mm1, %mm0 # sched: [3:2.00]
-; SKX-NEXT: phaddsw (%rdi), %mm0 # sched: [8:2.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_phaddsw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: phaddsw %mm1, %mm0 # sched: [5:0.50]
-; BDVER2-NEXT: phaddsw (%rdi), %mm0 # sched: [10:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_phaddsw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: phaddsw %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: phaddsw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_phaddsw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: phaddsw %mm1, %mm0 # sched: [100:0.25]
-; ZNVER1-NEXT: phaddsw (%rdi), %mm0 # sched: [100:0.25]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_phaddw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_phaddw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: phaddw %mm1, %mm0 # sched: [3:1.50]
-; GENERIC-NEXT: phaddw (%rdi), %mm0 # sched: [8:1.50]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_phaddw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: phaddw %mm1, %mm0 # sched: [5:2.50]
-; ATOM-NEXT: phaddw (%rdi), %mm0 # sched: [6:3.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_phaddw:
-; SLM: # %bb.0:
-; SLM-NEXT: phaddw %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: phaddw (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_phaddw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: phaddw %mm1, %mm0 # sched: [3:1.50]
-; SANDY-NEXT: phaddw (%rdi), %mm0 # sched: [8:1.50]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_phaddw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: phaddw %mm1, %mm0 # sched: [3:2.00]
-; HASWELL-NEXT: phaddw (%rdi), %mm0 # sched: [8:2.00]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_phaddw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: phaddw %mm1, %mm0 # sched: [3:2.00]
-; BROADWELL-NEXT: phaddw (%rdi), %mm0 # sched: [8:2.00]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_phaddw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: phaddw %mm1, %mm0 # sched: [3:2.00]
-; SKYLAKE-NEXT: phaddw (%rdi), %mm0 # sched: [8:2.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_phaddw:
-; SKX: # %bb.0:
-; SKX-NEXT: phaddw %mm1, %mm0 # sched: [3:2.00]
-; SKX-NEXT: phaddw (%rdi), %mm0 # sched: [8:2.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_phaddw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: phaddw %mm1, %mm0 # sched: [5:0.50]
-; BDVER2-NEXT: phaddw (%rdi), %mm0 # sched: [10:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_phaddw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: phaddw %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: phaddw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_phaddw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: phaddw %mm1, %mm0 # sched: [100:0.25]
-; ZNVER1-NEXT: phaddw (%rdi), %mm0 # sched: [100:0.25]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_phsubd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_phsubd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: phsubd %mm1, %mm0 # sched: [3:1.50]
-; GENERIC-NEXT: phsubd (%rdi), %mm0 # sched: [8:1.50]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_phsubd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: phsubd %mm1, %mm0 # sched: [3:1.50]
-; ATOM-NEXT: phsubd (%rdi), %mm0 # sched: [4:2.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_phsubd:
-; SLM: # %bb.0:
-; SLM-NEXT: phsubd %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: phsubd (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_phsubd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: phsubd %mm1, %mm0 # sched: [3:1.50]
-; SANDY-NEXT: phsubd (%rdi), %mm0 # sched: [8:1.50]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_phsubd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: phsubd %mm1, %mm0 # sched: [3:2.00]
-; HASWELL-NEXT: phsubd (%rdi), %mm0 # sched: [8:2.00]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_phsubd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: phsubd %mm1, %mm0 # sched: [3:2.00]
-; BROADWELL-NEXT: phsubd (%rdi), %mm0 # sched: [8:2.00]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_phsubd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: phsubd %mm1, %mm0 # sched: [3:2.00]
-; SKYLAKE-NEXT: phsubd (%rdi), %mm0 # sched: [8:2.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_phsubd:
-; SKX: # %bb.0:
-; SKX-NEXT: phsubd %mm1, %mm0 # sched: [3:2.00]
-; SKX-NEXT: phsubd (%rdi), %mm0 # sched: [8:2.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_phsubd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: phsubd %mm1, %mm0 # sched: [5:0.50]
-; BDVER2-NEXT: phsubd (%rdi), %mm0 # sched: [10:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_phsubd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: phsubd %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: phsubd (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_phsubd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: phsubd %mm1, %mm0 # sched: [100:0.25]
-; ZNVER1-NEXT: phsubd (%rdi), %mm0 # sched: [100:0.25]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_phsubsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_phsubsw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: phsubsw %mm1, %mm0 # sched: [3:1.50]
-; GENERIC-NEXT: phsubsw (%rdi), %mm0 # sched: [8:1.50]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_phsubsw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: phsubsw %mm1, %mm0 # sched: [5:2.50]
-; ATOM-NEXT: phsubsw (%rdi), %mm0 # sched: [6:3.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_phsubsw:
-; SLM: # %bb.0:
-; SLM-NEXT: phsubsw %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: phsubsw (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_phsubsw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: phsubsw %mm1, %mm0 # sched: [3:1.50]
-; SANDY-NEXT: phsubsw (%rdi), %mm0 # sched: [8:1.50]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_phsubsw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: phsubsw %mm1, %mm0 # sched: [3:2.00]
-; HASWELL-NEXT: phsubsw (%rdi), %mm0 # sched: [8:2.00]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_phsubsw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: phsubsw %mm1, %mm0 # sched: [3:2.00]
-; BROADWELL-NEXT: phsubsw (%rdi), %mm0 # sched: [8:2.00]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_phsubsw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: phsubsw %mm1, %mm0 # sched: [3:2.00]
-; SKYLAKE-NEXT: phsubsw (%rdi), %mm0 # sched: [8:2.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_phsubsw:
-; SKX: # %bb.0:
-; SKX-NEXT: phsubsw %mm1, %mm0 # sched: [3:2.00]
-; SKX-NEXT: phsubsw (%rdi), %mm0 # sched: [8:2.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_phsubsw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: phsubsw %mm1, %mm0 # sched: [5:0.50]
-; BDVER2-NEXT: phsubsw (%rdi), %mm0 # sched: [10:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_phsubsw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: phsubsw %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: phsubsw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_phsubsw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: phsubsw %mm1, %mm0 # sched: [100:0.25]
-; ZNVER1-NEXT: phsubsw (%rdi), %mm0 # sched: [100:0.25]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_phsubw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_phsubw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: phsubw %mm1, %mm0 # sched: [3:1.50]
-; GENERIC-NEXT: phsubw (%rdi), %mm0 # sched: [8:1.50]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_phsubw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: phsubw %mm1, %mm0 # sched: [5:2.50]
-; ATOM-NEXT: phsubw (%rdi), %mm0 # sched: [6:3.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_phsubw:
-; SLM: # %bb.0:
-; SLM-NEXT: phsubw %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: phsubw (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_phsubw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: phsubw %mm1, %mm0 # sched: [3:1.50]
-; SANDY-NEXT: phsubw (%rdi), %mm0 # sched: [8:1.50]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_phsubw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: phsubw %mm1, %mm0 # sched: [3:2.00]
-; HASWELL-NEXT: phsubw (%rdi), %mm0 # sched: [8:2.00]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_phsubw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: phsubw %mm1, %mm0 # sched: [3:2.00]
-; BROADWELL-NEXT: phsubw (%rdi), %mm0 # sched: [8:2.00]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_phsubw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: phsubw %mm1, %mm0 # sched: [3:2.00]
-; SKYLAKE-NEXT: phsubw (%rdi), %mm0 # sched: [8:2.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_phsubw:
-; SKX: # %bb.0:
-; SKX-NEXT: phsubw %mm1, %mm0 # sched: [3:2.00]
-; SKX-NEXT: phsubw (%rdi), %mm0 # sched: [8:2.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_phsubw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: phsubw %mm1, %mm0 # sched: [5:0.50]
-; BDVER2-NEXT: phsubw (%rdi), %mm0 # sched: [10:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_phsubw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: phsubw %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: phsubw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_phsubw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: phsubw %mm1, %mm0 # sched: [100:0.25]
-; ZNVER1-NEXT: phsubw (%rdi), %mm0 # sched: [100:0.25]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pinsrw(x86_mmx %a0, i32 %a1, i16* %a2) optsize {
-; GENERIC-LABEL: test_pinsrw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:1.00]
-; GENERIC-NEXT: movswl (%rsi), %eax # sched: [5:0.50]
-; GENERIC-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pinsrw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pinsrw $0, %edi, %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movswl (%rsi), %eax # sched: [1:1.00]
-; ATOM-NEXT: pinsrw $1, %eax, %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pinsrw:
-; SLM: # %bb.0:
-; SLM-NEXT: movswl (%rsi), %eax # sched: [4:1.00]
-; SLM-NEXT: pinsrw $0, %edi, %mm0 # sched: [1:1.00]
-; SLM-NEXT: pinsrw $1, %eax, %mm0 # sched: [1:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pinsrw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:1.00]
-; SANDY-NEXT: movswl (%rsi), %eax # sched: [5:0.50]
-; SANDY-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pinsrw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:2.00]
-; HASWELL-NEXT: movswl (%rsi), %eax # sched: [5:0.50]
-; HASWELL-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:2.00]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pinsrw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:2.00]
-; BROADWELL-NEXT: movswl (%rsi), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:2.00]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pinsrw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:2.00]
-; SKYLAKE-NEXT: movswl (%rsi), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:2.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pinsrw:
-; SKX: # %bb.0:
-; SKX-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:2.00]
-; SKX-NEXT: movswl (%rsi), %eax # sched: [5:0.50]
-; SKX-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:2.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_pinsrw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movswl (%rsi), %eax # sched: [5:0.50]
-; BDVER2-NEXT: pinsrw $0, %edi, %mm0 # sched: [12:0.50]
-; BDVER2-NEXT: pinsrw $1, %eax, %mm0 # sched: [12:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pinsrw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movswl (%rsi), %eax # sched: [4:1.00]
-; BTVER2-NEXT: pinsrw $0, %edi, %mm0 # sched: [7:0.50]
-; BTVER2-NEXT: pinsrw $1, %eax, %mm0 # sched: [7:0.50]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pinsrw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movswl (%rsi), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: pinsrw $0, %edi, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: pinsrw $1, %eax, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx %a0, i32 %a1, i32 0)
- %2 = load i16, i16 *%a2, align 2
- %3 = sext i16 %2 to i32
- %4 = call x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx %1, i32 %3, i32 1)
- %5 = bitcast x86_mmx %4 to i64
- ret i64 %5
-}
-declare x86_mmx @llvm.x86.mmx.pinsr.w(x86_mmx, i32, i32) nounwind readnone
-
-define i64 @test_pmaddwd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_pmaddwd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pmaddwd %mm1, %mm0 # sched: [5:1.00]
-; GENERIC-NEXT: pmaddwd (%rdi), %mm0 # sched: [10:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pmaddwd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pmaddwd %mm1, %mm0 # sched: [4:4.00]
-; ATOM-NEXT: pmaddwd (%rdi), %mm0 # sched: [4:4.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pmaddwd:
-; SLM: # %bb.0:
-; SLM-NEXT: pmaddwd %mm1, %mm0 # sched: [4:1.00]
-; SLM-NEXT: pmaddwd (%rdi), %mm0 # sched: [7:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pmaddwd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: pmaddwd %mm1, %mm0 # sched: [5:1.00]
-; SANDY-NEXT: pmaddwd (%rdi), %mm0 # sched: [10:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmaddwd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: pmaddwd %mm1, %mm0 # sched: [5:1.00]
-; HASWELL-NEXT: pmaddwd (%rdi), %mm0 # sched: [10:1.00]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmaddwd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: pmaddwd %mm1, %mm0 # sched: [5:1.00]
-; BROADWELL-NEXT: pmaddwd (%rdi), %mm0 # sched: [10:1.00]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmaddwd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: pmaddwd %mm1, %mm0 # sched: [4:1.00]
-; SKYLAKE-NEXT: pmaddwd (%rdi), %mm0 # sched: [9:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmaddwd:
-; SKX: # %bb.0:
-; SKX-NEXT: pmaddwd %mm1, %mm0 # sched: [4:1.00]
-; SKX-NEXT: pmaddwd (%rdi), %mm0 # sched: [9:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_pmaddwd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: pmaddwd %mm1, %mm0 # sched: [4:1.00]
-; BDVER2-NEXT: pmaddwd (%rdi), %mm0 # sched: [9:1.00]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pmaddwd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: pmaddwd %mm1, %mm0 # sched: [2:1.00]
-; BTVER2-NEXT: pmaddwd (%rdi), %mm0 # sched: [7:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pmaddwd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: pmaddwd %mm1, %mm0 # sched: [4:1.00]
-; ZNVER1-NEXT: pmaddwd (%rdi), %mm0 # sched: [11:1.00]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pmaddubsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_pmaddubsw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pmaddubsw %mm1, %mm0 # sched: [5:1.00]
-; GENERIC-NEXT: pmaddubsw (%rdi), %mm0 # sched: [10:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pmaddubsw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pmaddubsw %mm1, %mm0 # sched: [4:4.00]
-; ATOM-NEXT: pmaddubsw (%rdi), %mm0 # sched: [4:4.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pmaddubsw:
-; SLM: # %bb.0:
-; SLM-NEXT: pmaddubsw %mm1, %mm0 # sched: [4:1.00]
-; SLM-NEXT: pmaddubsw (%rdi), %mm0 # sched: [7:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pmaddubsw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: pmaddubsw %mm1, %mm0 # sched: [5:1.00]
-; SANDY-NEXT: pmaddubsw (%rdi), %mm0 # sched: [10:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmaddubsw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: pmaddubsw %mm1, %mm0 # sched: [5:1.00]
-; HASWELL-NEXT: pmaddubsw (%rdi), %mm0 # sched: [10:1.00]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmaddubsw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: pmaddubsw %mm1, %mm0 # sched: [5:1.00]
-; BROADWELL-NEXT: pmaddubsw (%rdi), %mm0 # sched: [10:1.00]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmaddubsw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: pmaddubsw %mm1, %mm0 # sched: [4:1.00]
-; SKYLAKE-NEXT: pmaddubsw (%rdi), %mm0 # sched: [9:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmaddubsw:
-; SKX: # %bb.0:
-; SKX-NEXT: pmaddubsw %mm1, %mm0 # sched: [4:1.00]
-; SKX-NEXT: pmaddubsw (%rdi), %mm0 # sched: [9:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_pmaddubsw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: pmaddubsw %mm1, %mm0 # sched: [4:1.00]
-; BDVER2-NEXT: pmaddubsw (%rdi), %mm0 # sched: [9:1.00]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pmaddubsw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: pmaddubsw %mm1, %mm0 # sched: [2:1.00]
-; BTVER2-NEXT: pmaddubsw (%rdi), %mm0 # sched: [7:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pmaddubsw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: pmaddubsw %mm1, %mm0 # sched: [4:1.00]
-; ZNVER1-NEXT: pmaddubsw (%rdi), %mm0 # sched: [11:1.00]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pmaxsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_pmaxsw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pmaxsw %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT: pmaxsw (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pmaxsw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pmaxsw %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: pmaxsw (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pmaxsw:
-; SLM: # %bb.0:
-; SLM-NEXT: pmaxsw %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: pmaxsw (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pmaxsw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: pmaxsw %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT: pmaxsw (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmaxsw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: pmaxsw %mm1, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT: pmaxsw (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmaxsw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: pmaxsw %mm1, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT: pmaxsw (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmaxsw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: pmaxsw %mm1, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: pmaxsw (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmaxsw:
-; SKX: # %bb.0:
-; SKX-NEXT: pmaxsw %mm1, %mm0 # sched: [1:1.00]
-; SKX-NEXT: pmaxsw (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_pmaxsw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: pmaxsw %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: pmaxsw (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pmaxsw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: pmaxsw %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: pmaxsw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pmaxsw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: pmaxsw %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: pmaxsw (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pmaxub(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_pmaxub:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pmaxub %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT: pmaxub (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pmaxub:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pmaxub %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: pmaxub (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pmaxub:
-; SLM: # %bb.0:
-; SLM-NEXT: pmaxub %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: pmaxub (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pmaxub:
-; SANDY: # %bb.0:
-; SANDY-NEXT: pmaxub %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT: pmaxub (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmaxub:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: pmaxub %mm1, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT: pmaxub (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmaxub:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: pmaxub %mm1, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT: pmaxub (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmaxub:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: pmaxub %mm1, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: pmaxub (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmaxub:
-; SKX: # %bb.0:
-; SKX-NEXT: pmaxub %mm1, %mm0 # sched: [1:1.00]
-; SKX-NEXT: pmaxub (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_pmaxub:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: pmaxub %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: pmaxub (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pmaxub:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: pmaxub %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: pmaxub (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pmaxub:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: pmaxub %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: pmaxub (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pminsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_pminsw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pminsw %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT: pminsw (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pminsw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pminsw %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: pminsw (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pminsw:
-; SLM: # %bb.0:
-; SLM-NEXT: pminsw %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: pminsw (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pminsw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: pminsw %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT: pminsw (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pminsw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: pminsw %mm1, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT: pminsw (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pminsw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: pminsw %mm1, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT: pminsw (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pminsw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: pminsw %mm1, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: pminsw (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pminsw:
-; SKX: # %bb.0:
-; SKX-NEXT: pminsw %mm1, %mm0 # sched: [1:1.00]
-; SKX-NEXT: pminsw (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_pminsw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: pminsw %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: pminsw (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pminsw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: pminsw %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: pminsw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pminsw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: pminsw %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: pminsw (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pminub(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_pminub:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pminub %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT: pminub (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pminub:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pminub %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: pminub (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pminub:
-; SLM: # %bb.0:
-; SLM-NEXT: pminub %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: pminub (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pminub:
-; SANDY: # %bb.0:
-; SANDY-NEXT: pminub %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT: pminub (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pminub:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: pminub %mm1, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT: pminub (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pminub:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: pminub %mm1, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT: pminub (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pminub:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: pminub %mm1, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: pminub (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pminub:
-; SKX: # %bb.0:
-; SKX-NEXT: pminub %mm1, %mm0 # sched: [1:1.00]
-; SKX-NEXT: pminub (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_pminub:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: pminub %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: pminub (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pminub:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: pminub %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: pminub (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pminub:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: pminub %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: pminub (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone
-
-define i32 @test_pmovmskb(x86_mmx %a0) optsize {
-; GENERIC-LABEL: test_pmovmskb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pmovmskb %mm0, %eax # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pmovmskb:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pmovmskb %mm0, %eax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pmovmskb:
-; SLM: # %bb.0:
-; SLM-NEXT: pmovmskb %mm0, %eax # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pmovmskb:
-; SANDY: # %bb.0:
-; SANDY-NEXT: pmovmskb %mm0, %eax # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmovmskb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: pmovmskb %mm0, %eax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmovmskb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: pmovmskb %mm0, %eax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmovmskb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: pmovmskb %mm0, %eax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmovmskb:
-; SKX: # %bb.0:
-; SKX-NEXT: pmovmskb %mm0, %eax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_pmovmskb:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: pmovmskb %mm0, %eax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pmovmskb:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: pmovmskb %mm0, %eax # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pmovmskb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: pmovmskb %mm0, %eax # sched: [1:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %a0)
- ret i32 %1
-}
-declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone
-
-define i64 @test_pmulhrsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_pmulhrsw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pmulhrsw %mm1, %mm0 # sched: [5:1.00]
-; GENERIC-NEXT: pmulhrsw (%rdi), %mm0 # sched: [10:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pmulhrsw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pmulhrsw %mm1, %mm0 # sched: [4:4.00]
-; ATOM-NEXT: pmulhrsw (%rdi), %mm0 # sched: [4:4.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pmulhrsw:
-; SLM: # %bb.0:
-; SLM-NEXT: pmulhrsw %mm1, %mm0 # sched: [4:1.00]
-; SLM-NEXT: pmulhrsw (%rdi), %mm0 # sched: [7:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pmulhrsw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: pmulhrsw %mm1, %mm0 # sched: [5:1.00]
-; SANDY-NEXT: pmulhrsw (%rdi), %mm0 # sched: [10:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmulhrsw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: pmulhrsw %mm1, %mm0 # sched: [5:1.00]
-; HASWELL-NEXT: pmulhrsw (%rdi), %mm0 # sched: [10:1.00]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmulhrsw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: pmulhrsw %mm1, %mm0 # sched: [5:1.00]
-; BROADWELL-NEXT: pmulhrsw (%rdi), %mm0 # sched: [10:1.00]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmulhrsw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: pmulhrsw %mm1, %mm0 # sched: [4:1.00]
-; SKYLAKE-NEXT: pmulhrsw (%rdi), %mm0 # sched: [9:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmulhrsw:
-; SKX: # %bb.0:
-; SKX-NEXT: pmulhrsw %mm1, %mm0 # sched: [4:1.00]
-; SKX-NEXT: pmulhrsw (%rdi), %mm0 # sched: [9:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_pmulhrsw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: pmulhrsw %mm1, %mm0 # sched: [4:1.00]
-; BDVER2-NEXT: pmulhrsw (%rdi), %mm0 # sched: [9:1.00]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pmulhrsw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: pmulhrsw %mm1, %mm0 # sched: [2:1.00]
-; BTVER2-NEXT: pmulhrsw (%rdi), %mm0 # sched: [7:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pmulhrsw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: pmulhrsw %mm1, %mm0 # sched: [4:1.00]
-; ZNVER1-NEXT: pmulhrsw (%rdi), %mm0 # sched: [11:1.00]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pmulhw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_pmulhw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pmulhw %mm1, %mm0 # sched: [5:1.00]
-; GENERIC-NEXT: pmulhw (%rdi), %mm0 # sched: [10:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pmulhw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pmulhw %mm1, %mm0 # sched: [4:4.00]
-; ATOM-NEXT: pmulhw (%rdi), %mm0 # sched: [4:4.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pmulhw:
-; SLM: # %bb.0:
-; SLM-NEXT: pmulhw %mm1, %mm0 # sched: [4:1.00]
-; SLM-NEXT: pmulhw (%rdi), %mm0 # sched: [7:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pmulhw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: pmulhw %mm1, %mm0 # sched: [5:1.00]
-; SANDY-NEXT: pmulhw (%rdi), %mm0 # sched: [10:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmulhw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: pmulhw %mm1, %mm0 # sched: [5:1.00]
-; HASWELL-NEXT: pmulhw (%rdi), %mm0 # sched: [10:1.00]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmulhw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: pmulhw %mm1, %mm0 # sched: [5:1.00]
-; BROADWELL-NEXT: pmulhw (%rdi), %mm0 # sched: [10:1.00]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmulhw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: pmulhw %mm1, %mm0 # sched: [4:1.00]
-; SKYLAKE-NEXT: pmulhw (%rdi), %mm0 # sched: [9:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmulhw:
-; SKX: # %bb.0:
-; SKX-NEXT: pmulhw %mm1, %mm0 # sched: [4:1.00]
-; SKX-NEXT: pmulhw (%rdi), %mm0 # sched: [9:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_pmulhw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: pmulhw %mm1, %mm0 # sched: [4:1.00]
-; BDVER2-NEXT: pmulhw (%rdi), %mm0 # sched: [9:1.00]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pmulhw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: pmulhw %mm1, %mm0 # sched: [2:1.00]
-; BTVER2-NEXT: pmulhw (%rdi), %mm0 # sched: [7:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pmulhw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: pmulhw %mm1, %mm0 # sched: [4:1.00]
-; ZNVER1-NEXT: pmulhw (%rdi), %mm0 # sched: [11:1.00]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pmulhuw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_pmulhuw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pmulhuw %mm1, %mm0 # sched: [5:1.00]
-; GENERIC-NEXT: pmulhuw (%rdi), %mm0 # sched: [10:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pmulhuw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pmulhuw %mm1, %mm0 # sched: [4:4.00]
-; ATOM-NEXT: pmulhuw (%rdi), %mm0 # sched: [4:4.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pmulhuw:
-; SLM: # %bb.0:
-; SLM-NEXT: pmulhuw %mm1, %mm0 # sched: [4:1.00]
-; SLM-NEXT: pmulhuw (%rdi), %mm0 # sched: [7:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pmulhuw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: pmulhuw %mm1, %mm0 # sched: [5:1.00]
-; SANDY-NEXT: pmulhuw (%rdi), %mm0 # sched: [10:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmulhuw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: pmulhuw %mm1, %mm0 # sched: [5:1.00]
-; HASWELL-NEXT: pmulhuw (%rdi), %mm0 # sched: [10:1.00]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmulhuw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: pmulhuw %mm1, %mm0 # sched: [5:1.00]
-; BROADWELL-NEXT: pmulhuw (%rdi), %mm0 # sched: [10:1.00]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmulhuw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: pmulhuw %mm1, %mm0 # sched: [4:1.00]
-; SKYLAKE-NEXT: pmulhuw (%rdi), %mm0 # sched: [9:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmulhuw:
-; SKX: # %bb.0:
-; SKX-NEXT: pmulhuw %mm1, %mm0 # sched: [4:1.00]
-; SKX-NEXT: pmulhuw (%rdi), %mm0 # sched: [9:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_pmulhuw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: pmulhuw %mm1, %mm0 # sched: [4:1.00]
-; BDVER2-NEXT: pmulhuw (%rdi), %mm0 # sched: [9:1.00]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pmulhuw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: pmulhuw %mm1, %mm0 # sched: [2:1.00]
-; BTVER2-NEXT: pmulhuw (%rdi), %mm0 # sched: [7:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pmulhuw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: pmulhuw %mm1, %mm0 # sched: [4:1.00]
-; ZNVER1-NEXT: pmulhuw (%rdi), %mm0 # sched: [11:1.00]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pmullw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_pmullw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pmullw %mm1, %mm0 # sched: [5:1.00]
-; GENERIC-NEXT: pmullw (%rdi), %mm0 # sched: [10:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pmullw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pmullw %mm1, %mm0 # sched: [4:4.00]
-; ATOM-NEXT: pmullw (%rdi), %mm0 # sched: [4:4.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pmullw:
-; SLM: # %bb.0:
-; SLM-NEXT: pmullw %mm1, %mm0 # sched: [4:1.00]
-; SLM-NEXT: pmullw (%rdi), %mm0 # sched: [7:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pmullw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: pmullw %mm1, %mm0 # sched: [5:1.00]
-; SANDY-NEXT: pmullw (%rdi), %mm0 # sched: [10:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmullw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: pmullw %mm1, %mm0 # sched: [5:1.00]
-; HASWELL-NEXT: pmullw (%rdi), %mm0 # sched: [10:1.00]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmullw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: pmullw %mm1, %mm0 # sched: [5:1.00]
-; BROADWELL-NEXT: pmullw (%rdi), %mm0 # sched: [10:1.00]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmullw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: pmullw %mm1, %mm0 # sched: [4:1.00]
-; SKYLAKE-NEXT: pmullw (%rdi), %mm0 # sched: [9:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmullw:
-; SKX: # %bb.0:
-; SKX-NEXT: pmullw %mm1, %mm0 # sched: [4:1.00]
-; SKX-NEXT: pmullw (%rdi), %mm0 # sched: [9:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_pmullw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: pmullw %mm1, %mm0 # sched: [4:1.00]
-; BDVER2-NEXT: pmullw (%rdi), %mm0 # sched: [9:1.00]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pmullw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: pmullw %mm1, %mm0 # sched: [2:1.00]
-; BTVER2-NEXT: pmullw (%rdi), %mm0 # sched: [7:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pmullw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: pmullw %mm1, %mm0 # sched: [4:1.00]
-; ZNVER1-NEXT: pmullw (%rdi), %mm0 # sched: [11:1.00]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pmuludq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_pmuludq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pmuludq %mm1, %mm0 # sched: [5:1.00]
-; GENERIC-NEXT: pmuludq (%rdi), %mm0 # sched: [10:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pmuludq:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pmuludq %mm1, %mm0 # sched: [4:4.00]
-; ATOM-NEXT: pmuludq (%rdi), %mm0 # sched: [4:4.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pmuludq:
-; SLM: # %bb.0:
-; SLM-NEXT: pmuludq %mm1, %mm0 # sched: [4:1.00]
-; SLM-NEXT: pmuludq (%rdi), %mm0 # sched: [7:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pmuludq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: pmuludq %mm1, %mm0 # sched: [5:1.00]
-; SANDY-NEXT: pmuludq (%rdi), %mm0 # sched: [10:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pmuludq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: pmuludq %mm1, %mm0 # sched: [5:1.00]
-; HASWELL-NEXT: pmuludq (%rdi), %mm0 # sched: [10:1.00]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmuludq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: pmuludq %mm1, %mm0 # sched: [5:1.00]
-; BROADWELL-NEXT: pmuludq (%rdi), %mm0 # sched: [10:1.00]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmuludq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: pmuludq %mm1, %mm0 # sched: [4:1.00]
-; SKYLAKE-NEXT: pmuludq (%rdi), %mm0 # sched: [9:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmuludq:
-; SKX: # %bb.0:
-; SKX-NEXT: pmuludq %mm1, %mm0 # sched: [4:1.00]
-; SKX-NEXT: pmuludq (%rdi), %mm0 # sched: [9:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_pmuludq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: pmuludq %mm1, %mm0 # sched: [4:1.00]
-; BDVER2-NEXT: pmuludq (%rdi), %mm0 # sched: [9:1.00]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pmuludq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: pmuludq %mm1, %mm0 # sched: [2:1.00]
-; BTVER2-NEXT: pmuludq (%rdi), %mm0 # sched: [7:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pmuludq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: pmuludq %mm1, %mm0 # sched: [4:1.00]
-; ZNVER1-NEXT: pmuludq (%rdi), %mm0 # sched: [11:1.00]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_por(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_por:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: por %mm1, %mm0 # sched: [1:0.33]
-; GENERIC-NEXT: por (%rdi), %mm0 # sched: [6:0.50]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_por:
-; ATOM: # %bb.0:
-; ATOM-NEXT: por %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: por (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_por:
-; SLM: # %bb.0:
-; SLM-NEXT: por %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: por (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_por:
-; SANDY: # %bb.0:
-; SANDY-NEXT: por %mm1, %mm0 # sched: [1:0.33]
-; SANDY-NEXT: por (%rdi), %mm0 # sched: [6:0.50]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_por:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: por %mm1, %mm0 # sched: [1:0.33]
-; HASWELL-NEXT: por (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_por:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: por %mm1, %mm0 # sched: [1:0.33]
-; BROADWELL-NEXT: por (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_por:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: por %mm1, %mm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: por (%rdi), %mm0 # sched: [6:0.50]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_por:
-; SKX: # %bb.0:
-; SKX-NEXT: por %mm1, %mm0 # sched: [1:0.50]
-; SKX-NEXT: por (%rdi), %mm0 # sched: [6:0.50]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_por:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: por %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: por (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_por:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: por %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: por (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_por:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: por %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: por (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.por(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_psadbw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_psadbw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psadbw %mm1, %mm0 # sched: [5:1.00]
-; GENERIC-NEXT: psadbw (%rdi), %mm0 # sched: [10:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psadbw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psadbw %mm1, %mm0 # sched: [4:2.00]
-; ATOM-NEXT: psadbw (%rdi), %mm0 # sched: [4:2.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psadbw:
-; SLM: # %bb.0:
-; SLM-NEXT: psadbw %mm1, %mm0 # sched: [4:1.00]
-; SLM-NEXT: psadbw (%rdi), %mm0 # sched: [7:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_psadbw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: psadbw %mm1, %mm0 # sched: [5:1.00]
-; SANDY-NEXT: psadbw (%rdi), %mm0 # sched: [10:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psadbw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: psadbw %mm1, %mm0 # sched: [5:1.00]
-; HASWELL-NEXT: psadbw (%rdi), %mm0 # sched: [10:1.00]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psadbw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: psadbw %mm1, %mm0 # sched: [5:1.00]
-; BROADWELL-NEXT: psadbw (%rdi), %mm0 # sched: [10:1.00]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psadbw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: psadbw %mm1, %mm0 # sched: [3:1.00]
-; SKYLAKE-NEXT: psadbw (%rdi), %mm0 # sched: [8:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psadbw:
-; SKX: # %bb.0:
-; SKX-NEXT: psadbw %mm1, %mm0 # sched: [3:1.00]
-; SKX-NEXT: psadbw (%rdi), %mm0 # sched: [8:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_psadbw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: psadbw %mm1, %mm0 # sched: [4:0.50]
-; BDVER2-NEXT: psadbw (%rdi), %mm0 # sched: [9:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_psadbw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: psadbw %mm1, %mm0 # sched: [2:0.50]
-; BTVER2-NEXT: psadbw (%rdi), %mm0 # sched: [7:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_psadbw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: psadbw %mm1, %mm0 # sched: [3:1.00]
-; ZNVER1-NEXT: psadbw (%rdi), %mm0 # sched: [10:1.00]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pshufb(x86_mmx %a0, x86_mmx %a1, x86_mmx *%a2) optsize {
-; GENERIC-LABEL: test_pshufb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pshufb %mm1, %mm0 # sched: [1:0.50]
-; GENERIC-NEXT: pshufb (%rdi), %mm0 # sched: [6:0.50]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pshufb:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pshufb %mm1, %mm0 # sched: [1:1.00]
-; ATOM-NEXT: pshufb (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pshufb:
-; SLM: # %bb.0:
-; SLM-NEXT: pshufb %mm1, %mm0 # sched: [1:1.00]
-; SLM-NEXT: pshufb (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pshufb:
-; SANDY: # %bb.0:
-; SANDY-NEXT: pshufb %mm1, %mm0 # sched: [1:0.50]
-; SANDY-NEXT: pshufb (%rdi), %mm0 # sched: [6:0.50]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pshufb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: pshufb %mm1, %mm0 # sched: [1:1.00]
-; HASWELL-NEXT: pshufb (%rdi), %mm0 # sched: [6:1.00]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pshufb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: pshufb %mm1, %mm0 # sched: [1:1.00]
-; BROADWELL-NEXT: pshufb (%rdi), %mm0 # sched: [6:1.00]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pshufb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: pshufb %mm1, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: pshufb (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pshufb:
-; SKX: # %bb.0:
-; SKX-NEXT: pshufb %mm1, %mm0 # sched: [1:1.00]
-; SKX-NEXT: pshufb (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_pshufb:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: pshufb %mm1, %mm0 # sched: [3:2.00]
-; BDVER2-NEXT: pshufb (%rdi), %mm0 # sched: [8:2.00]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pshufb:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: pshufb %mm1, %mm0 # sched: [2:0.50]
-; BTVER2-NEXT: pshufb (%rdi), %mm0 # sched: [7:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pshufb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: pshufb %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: pshufb (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pshufw(x86_mmx *%a0) optsize {
-; GENERIC-LABEL: test_pshufw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00]
-; GENERIC-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pshufw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [1:1.00]
-; ATOM-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pshufw:
-; SLM: # %bb.0:
-; SLM-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [4:1.00]
-; SLM-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pshufw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00]
-; SANDY-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pshufw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00]
-; HASWELL-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pshufw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00]
-; BROADWELL-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pshufw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00]
-; SKYLAKE-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pshufw:
-; SKX: # %bb.0:
-; SKX-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00]
-; SKX-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_pshufw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [7:0.50]
-; BDVER2-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [2:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pshufw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00]
-; BTVER2-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:0.50]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pshufw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [8:0.50]
-; ZNVER1-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:0.25]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load x86_mmx, x86_mmx *%a0, align 8
- %2 = call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 0)
- %3 = call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %2, i8 0)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone
-
-define i64 @test_psignb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_psignb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psignb %mm1, %mm0 # sched: [1:0.50]
-; GENERIC-NEXT: psignb (%rdi), %mm0 # sched: [6:0.50]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psignb:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psignb %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: psignb (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psignb:
-; SLM: # %bb.0:
-; SLM-NEXT: psignb %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: psignb (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_psignb:
-; SANDY: # %bb.0:
-; SANDY-NEXT: psignb %mm1, %mm0 # sched: [1:0.50]
-; SANDY-NEXT: psignb (%rdi), %mm0 # sched: [6:0.50]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psignb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: psignb %mm1, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT: psignb (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psignb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: psignb %mm1, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT: psignb (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psignb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: psignb %mm1, %mm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: psignb (%rdi), %mm0 # sched: [6:0.50]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psignb:
-; SKX: # %bb.0:
-; SKX-NEXT: psignb %mm1, %mm0 # sched: [1:0.50]
-; SKX-NEXT: psignb (%rdi), %mm0 # sched: [6:0.50]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_psignb:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: psignb %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: psignb (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_psignb:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: psignb %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: psignb (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_psignb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: psignb %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: psignb (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_psignd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_psignd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psignd %mm1, %mm0 # sched: [1:0.50]
-; GENERIC-NEXT: psignd (%rdi), %mm0 # sched: [6:0.50]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psignd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psignd %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: psignd (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psignd:
-; SLM: # %bb.0:
-; SLM-NEXT: psignd %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: psignd (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_psignd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: psignd %mm1, %mm0 # sched: [1:0.50]
-; SANDY-NEXT: psignd (%rdi), %mm0 # sched: [6:0.50]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psignd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: psignd %mm1, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT: psignd (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psignd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: psignd %mm1, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT: psignd (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psignd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: psignd %mm1, %mm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: psignd (%rdi), %mm0 # sched: [6:0.50]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psignd:
-; SKX: # %bb.0:
-; SKX-NEXT: psignd %mm1, %mm0 # sched: [1:0.50]
-; SKX-NEXT: psignd (%rdi), %mm0 # sched: [6:0.50]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_psignd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: psignd %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: psignd (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_psignd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: psignd %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: psignd (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_psignd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: psignd %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: psignd (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_psignw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_psignw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psignw %mm1, %mm0 # sched: [1:0.50]
-; GENERIC-NEXT: psignw (%rdi), %mm0 # sched: [6:0.50]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psignw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psignw %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: psignw (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psignw:
-; SLM: # %bb.0:
-; SLM-NEXT: psignw %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: psignw (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_psignw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: psignw %mm1, %mm0 # sched: [1:0.50]
-; SANDY-NEXT: psignw (%rdi), %mm0 # sched: [6:0.50]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psignw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: psignw %mm1, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT: psignw (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psignw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: psignw %mm1, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT: psignw (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psignw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: psignw %mm1, %mm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: psignw (%rdi), %mm0 # sched: [6:0.50]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psignw:
-; SKX: # %bb.0:
-; SKX-NEXT: psignw %mm1, %mm0 # sched: [1:0.50]
-; SKX-NEXT: psignw (%rdi), %mm0 # sched: [6:0.50]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_psignw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: psignw %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: psignw (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_psignw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: psignw %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: psignw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_psignw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: psignw %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: psignw (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pslld(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_pslld:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pslld %mm1, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00]
-; GENERIC-NEXT: pslld $7, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pslld:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pslld %mm1, %mm0 # sched: [2:1.00]
-; ATOM-NEXT: pslld (%rdi), %mm0 # sched: [3:1.50]
-; ATOM-NEXT: pslld $7, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pslld:
-; SLM: # %bb.0:
-; SLM-NEXT: pslld %mm1, %mm0 # sched: [1:1.00]
-; SLM-NEXT: pslld (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: pslld $7, %mm0 # sched: [1:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pslld:
-; SANDY: # %bb.0:
-; SANDY-NEXT: pslld %mm1, %mm0 # sched: [1:1.00]
-; SANDY-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00]
-; SANDY-NEXT: pslld $7, %mm0 # sched: [1:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pslld:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: pslld %mm1, %mm0 # sched: [1:1.00]
-; HASWELL-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00]
-; HASWELL-NEXT: pslld $7, %mm0 # sched: [1:1.00]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pslld:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: pslld %mm1, %mm0 # sched: [1:1.00]
-; BROADWELL-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00]
-; BROADWELL-NEXT: pslld $7, %mm0 # sched: [1:1.00]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pslld:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: pslld %mm1, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT: pslld $7, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pslld:
-; SKX: # %bb.0:
-; SKX-NEXT: pslld %mm1, %mm0 # sched: [1:1.00]
-; SKX-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT: pslld $7, %mm0 # sched: [1:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_pslld:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: pslld %mm1, %mm0 # sched: [3:0.50]
-; BDVER2-NEXT: pslld (%rdi), %mm0 # sched: [8:0.50]
-; BDVER2-NEXT: pslld $7, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pslld:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: pslld %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: pslld $7, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pslld:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: pslld %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: pslld (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: pslld $7, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %1, x86_mmx %2)
- %4 = call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %3, i32 7)
- %5 = bitcast x86_mmx %4 to i64
- ret i64 %5
-}
-declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone
-declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone
-
-define i64 @test_psllq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_psllq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psllq %mm1, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00]
-; GENERIC-NEXT: psllq $7, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psllq:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psllq %mm1, %mm0 # sched: [2:1.00]
-; ATOM-NEXT: psllq (%rdi), %mm0 # sched: [3:1.50]
-; ATOM-NEXT: psllq $7, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psllq:
-; SLM: # %bb.0:
-; SLM-NEXT: psllq %mm1, %mm0 # sched: [1:1.00]
-; SLM-NEXT: psllq (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: psllq $7, %mm0 # sched: [1:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_psllq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: psllq %mm1, %mm0 # sched: [1:1.00]
-; SANDY-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00]
-; SANDY-NEXT: psllq $7, %mm0 # sched: [1:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psllq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: psllq %mm1, %mm0 # sched: [1:1.00]
-; HASWELL-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00]
-; HASWELL-NEXT: psllq $7, %mm0 # sched: [1:1.00]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psllq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: psllq %mm1, %mm0 # sched: [1:1.00]
-; BROADWELL-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00]
-; BROADWELL-NEXT: psllq $7, %mm0 # sched: [1:1.00]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psllq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: psllq %mm1, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT: psllq $7, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psllq:
-; SKX: # %bb.0:
-; SKX-NEXT: psllq %mm1, %mm0 # sched: [1:1.00]
-; SKX-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT: psllq $7, %mm0 # sched: [1:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_psllq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: psllq %mm1, %mm0 # sched: [3:0.50]
-; BDVER2-NEXT: psllq (%rdi), %mm0 # sched: [8:0.50]
-; BDVER2-NEXT: psllq $7, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_psllq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: psllq %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: psllq $7, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_psllq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: psllq %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: psllq (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: psllq $7, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %1, x86_mmx %2)
- %4 = call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %3, i32 7)
- %5 = bitcast x86_mmx %4 to i64
- ret i64 %5
-}
-declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone
-declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone
-
-define i64 @test_psllw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_psllw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psllw %mm1, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00]
-; GENERIC-NEXT: psllw $7, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psllw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psllw %mm1, %mm0 # sched: [2:1.00]
-; ATOM-NEXT: psllw (%rdi), %mm0 # sched: [3:1.50]
-; ATOM-NEXT: psllw $7, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psllw:
-; SLM: # %bb.0:
-; SLM-NEXT: psllw %mm1, %mm0 # sched: [1:1.00]
-; SLM-NEXT: psllw (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: psllw $7, %mm0 # sched: [1:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_psllw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: psllw %mm1, %mm0 # sched: [1:1.00]
-; SANDY-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00]
-; SANDY-NEXT: psllw $7, %mm0 # sched: [1:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psllw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: psllw %mm1, %mm0 # sched: [1:1.00]
-; HASWELL-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00]
-; HASWELL-NEXT: psllw $7, %mm0 # sched: [1:1.00]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psllw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: psllw %mm1, %mm0 # sched: [1:1.00]
-; BROADWELL-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00]
-; BROADWELL-NEXT: psllw $7, %mm0 # sched: [1:1.00]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psllw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: psllw %mm1, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT: psllw $7, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psllw:
-; SKX: # %bb.0:
-; SKX-NEXT: psllw %mm1, %mm0 # sched: [1:1.00]
-; SKX-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT: psllw $7, %mm0 # sched: [1:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_psllw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: psllw %mm1, %mm0 # sched: [3:0.50]
-; BDVER2-NEXT: psllw (%rdi), %mm0 # sched: [8:0.50]
-; BDVER2-NEXT: psllw $7, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_psllw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: psllw %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: psllw $7, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_psllw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: psllw %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: psllw (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: psllw $7, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %1, x86_mmx %2)
- %4 = call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %3, i32 7)
- %5 = bitcast x86_mmx %4 to i64
- ret i64 %5
-}
-declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone
-declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone
-
-define i64 @test_psrad(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_psrad:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psrad %mm1, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00]
-; GENERIC-NEXT: psrad $7, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psrad:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psrad %mm1, %mm0 # sched: [2:1.00]
-; ATOM-NEXT: psrad (%rdi), %mm0 # sched: [3:1.50]
-; ATOM-NEXT: psrad $7, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psrad:
-; SLM: # %bb.0:
-; SLM-NEXT: psrad %mm1, %mm0 # sched: [1:1.00]
-; SLM-NEXT: psrad (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: psrad $7, %mm0 # sched: [1:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_psrad:
-; SANDY: # %bb.0:
-; SANDY-NEXT: psrad %mm1, %mm0 # sched: [1:1.00]
-; SANDY-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00]
-; SANDY-NEXT: psrad $7, %mm0 # sched: [1:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psrad:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: psrad %mm1, %mm0 # sched: [1:1.00]
-; HASWELL-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00]
-; HASWELL-NEXT: psrad $7, %mm0 # sched: [1:1.00]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psrad:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: psrad %mm1, %mm0 # sched: [1:1.00]
-; BROADWELL-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00]
-; BROADWELL-NEXT: psrad $7, %mm0 # sched: [1:1.00]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psrad:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: psrad %mm1, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT: psrad $7, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psrad:
-; SKX: # %bb.0:
-; SKX-NEXT: psrad %mm1, %mm0 # sched: [1:1.00]
-; SKX-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT: psrad $7, %mm0 # sched: [1:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_psrad:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: psrad %mm1, %mm0 # sched: [3:0.50]
-; BDVER2-NEXT: psrad (%rdi), %mm0 # sched: [8:0.50]
-; BDVER2-NEXT: psrad $7, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_psrad:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: psrad %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: psrad $7, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_psrad:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: psrad %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: psrad (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: psrad $7, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %1, x86_mmx %2)
- %4 = call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %3, i32 7)
- %5 = bitcast x86_mmx %4 to i64
- ret i64 %5
-}
-declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone
-declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone
-
-define i64 @test_psraw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_psraw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psraw %mm1, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00]
-; GENERIC-NEXT: psraw $7, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psraw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psraw %mm1, %mm0 # sched: [2:1.00]
-; ATOM-NEXT: psraw (%rdi), %mm0 # sched: [3:1.50]
-; ATOM-NEXT: psraw $7, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psraw:
-; SLM: # %bb.0:
-; SLM-NEXT: psraw %mm1, %mm0 # sched: [1:1.00]
-; SLM-NEXT: psraw (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: psraw $7, %mm0 # sched: [1:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_psraw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: psraw %mm1, %mm0 # sched: [1:1.00]
-; SANDY-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00]
-; SANDY-NEXT: psraw $7, %mm0 # sched: [1:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psraw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: psraw %mm1, %mm0 # sched: [1:1.00]
-; HASWELL-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00]
-; HASWELL-NEXT: psraw $7, %mm0 # sched: [1:1.00]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psraw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: psraw %mm1, %mm0 # sched: [1:1.00]
-; BROADWELL-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00]
-; BROADWELL-NEXT: psraw $7, %mm0 # sched: [1:1.00]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psraw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: psraw %mm1, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT: psraw $7, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psraw:
-; SKX: # %bb.0:
-; SKX-NEXT: psraw %mm1, %mm0 # sched: [1:1.00]
-; SKX-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT: psraw $7, %mm0 # sched: [1:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_psraw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: psraw %mm1, %mm0 # sched: [3:0.50]
-; BDVER2-NEXT: psraw (%rdi), %mm0 # sched: [8:0.50]
-; BDVER2-NEXT: psraw $7, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_psraw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: psraw %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: psraw $7, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_psraw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: psraw %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: psraw (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: psraw $7, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %1, x86_mmx %2)
- %4 = call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %3, i32 7)
- %5 = bitcast x86_mmx %4 to i64
- ret i64 %5
-}
-declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone
-declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone
-
-define i64 @test_psrld(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_psrld:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psrld %mm1, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00]
-; GENERIC-NEXT: psrld $7, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psrld:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psrld %mm1, %mm0 # sched: [2:1.00]
-; ATOM-NEXT: psrld (%rdi), %mm0 # sched: [3:1.50]
-; ATOM-NEXT: psrld $7, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psrld:
-; SLM: # %bb.0:
-; SLM-NEXT: psrld %mm1, %mm0 # sched: [1:1.00]
-; SLM-NEXT: psrld (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: psrld $7, %mm0 # sched: [1:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_psrld:
-; SANDY: # %bb.0:
-; SANDY-NEXT: psrld %mm1, %mm0 # sched: [1:1.00]
-; SANDY-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00]
-; SANDY-NEXT: psrld $7, %mm0 # sched: [1:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psrld:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: psrld %mm1, %mm0 # sched: [1:1.00]
-; HASWELL-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00]
-; HASWELL-NEXT: psrld $7, %mm0 # sched: [1:1.00]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psrld:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: psrld %mm1, %mm0 # sched: [1:1.00]
-; BROADWELL-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00]
-; BROADWELL-NEXT: psrld $7, %mm0 # sched: [1:1.00]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psrld:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: psrld %mm1, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT: psrld $7, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psrld:
-; SKX: # %bb.0:
-; SKX-NEXT: psrld %mm1, %mm0 # sched: [1:1.00]
-; SKX-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT: psrld $7, %mm0 # sched: [1:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_psrld:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: psrld %mm1, %mm0 # sched: [3:0.50]
-; BDVER2-NEXT: psrld (%rdi), %mm0 # sched: [8:0.50]
-; BDVER2-NEXT: psrld $7, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_psrld:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: psrld %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: psrld $7, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_psrld:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: psrld %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: psrld (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: psrld $7, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %1, x86_mmx %2)
- %4 = call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %3, i32 7)
- %5 = bitcast x86_mmx %4 to i64
- ret i64 %5
-}
-declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone
-declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone
-
-define i64 @test_psrlq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_psrlq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00]
-; GENERIC-NEXT: psrlq $7, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psrlq:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psrlq %mm1, %mm0 # sched: [2:1.00]
-; ATOM-NEXT: psrlq (%rdi), %mm0 # sched: [3:1.50]
-; ATOM-NEXT: psrlq $7, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psrlq:
-; SLM: # %bb.0:
-; SLM-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00]
-; SLM-NEXT: psrlq (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: psrlq $7, %mm0 # sched: [1:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_psrlq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00]
-; SANDY-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00]
-; SANDY-NEXT: psrlq $7, %mm0 # sched: [1:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psrlq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00]
-; HASWELL-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00]
-; HASWELL-NEXT: psrlq $7, %mm0 # sched: [1:1.00]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psrlq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00]
-; BROADWELL-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00]
-; BROADWELL-NEXT: psrlq $7, %mm0 # sched: [1:1.00]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psrlq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT: psrlq $7, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psrlq:
-; SKX: # %bb.0:
-; SKX-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00]
-; SKX-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT: psrlq $7, %mm0 # sched: [1:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_psrlq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: psrlq %mm1, %mm0 # sched: [3:0.50]
-; BDVER2-NEXT: psrlq (%rdi), %mm0 # sched: [8:0.50]
-; BDVER2-NEXT: psrlq $7, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_psrlq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: psrlq %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: psrlq $7, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_psrlq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: psrlq %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: psrlq (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: psrlq $7, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %1, x86_mmx %2)
- %4 = call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %3, i32 7)
- %5 = bitcast x86_mmx %4 to i64
- ret i64 %5
-}
-declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone
-declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone
-
-define i64 @test_psrlw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_psrlw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00]
-; GENERIC-NEXT: psrlw $7, %mm0 # sched: [1:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psrlw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psrlw %mm1, %mm0 # sched: [2:1.00]
-; ATOM-NEXT: psrlw (%rdi), %mm0 # sched: [3:1.50]
-; ATOM-NEXT: psrlw $7, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psrlw:
-; SLM: # %bb.0:
-; SLM-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00]
-; SLM-NEXT: psrlw (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: psrlw $7, %mm0 # sched: [1:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_psrlw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00]
-; SANDY-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00]
-; SANDY-NEXT: psrlw $7, %mm0 # sched: [1:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psrlw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00]
-; HASWELL-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00]
-; HASWELL-NEXT: psrlw $7, %mm0 # sched: [1:1.00]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psrlw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00]
-; BROADWELL-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00]
-; BROADWELL-NEXT: psrlw $7, %mm0 # sched: [1:1.00]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psrlw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT: psrlw $7, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psrlw:
-; SKX: # %bb.0:
-; SKX-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00]
-; SKX-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT: psrlw $7, %mm0 # sched: [1:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_psrlw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: psrlw %mm1, %mm0 # sched: [3:0.50]
-; BDVER2-NEXT: psrlw (%rdi), %mm0 # sched: [8:0.50]
-; BDVER2-NEXT: psrlw $7, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_psrlw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: psrlw %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: psrlw $7, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_psrlw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: psrlw %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: psrlw (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: psrlw $7, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %1, x86_mmx %2)
- %4 = call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %3, i32 7)
- %5 = bitcast x86_mmx %4 to i64
- ret i64 %5
-}
-declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone
-declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone
-
-define i64 @test_psubb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_psubb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psubb %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT: psubb (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psubb:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psubb %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: psubb (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psubb:
-; SLM: # %bb.0:
-; SLM-NEXT: psubb %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: psubb (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_psubb:
-; SANDY: # %bb.0:
-; SANDY-NEXT: psubb %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT: psubb (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psubb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: psubb %mm1, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT: psubb (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psubb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: psubb %mm1, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT: psubb (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psubb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: psubb %mm1, %mm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: psubb (%rdi), %mm0 # sched: [6:0.50]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psubb:
-; SKX: # %bb.0:
-; SKX-NEXT: psubb %mm1, %mm0 # sched: [1:0.50]
-; SKX-NEXT: psubb (%rdi), %mm0 # sched: [6:0.50]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_psubb:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: psubb %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: psubb (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_psubb:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: psubb %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: psubb (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_psubb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: psubb %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: psubb (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_psubd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_psubd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psubd %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT: psubd (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psubd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psubd %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: psubd (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psubd:
-; SLM: # %bb.0:
-; SLM-NEXT: psubd %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: psubd (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_psubd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: psubd %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT: psubd (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psubd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: psubd %mm1, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT: psubd (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psubd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: psubd %mm1, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT: psubd (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psubd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: psubd %mm1, %mm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: psubd (%rdi), %mm0 # sched: [6:0.50]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psubd:
-; SKX: # %bb.0:
-; SKX-NEXT: psubd %mm1, %mm0 # sched: [1:0.50]
-; SKX-NEXT: psubd (%rdi), %mm0 # sched: [6:0.50]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_psubd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: psubd %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: psubd (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_psubd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: psubd %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: psubd (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_psubd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: psubd %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: psubd (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_psubq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_psubq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psubq %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT: psubq (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psubq:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psubq %mm1, %mm0 # sched: [2:1.00]
-; ATOM-NEXT: psubq (%rdi), %mm0 # sched: [3:1.50]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psubq:
-; SLM: # %bb.0:
-; SLM-NEXT: psubq %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: psubq (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_psubq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: psubq %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT: psubq (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psubq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: psubq %mm1, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT: psubq (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psubq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: psubq %mm1, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT: psubq (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psubq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: psubq %mm1, %mm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: psubq (%rdi), %mm0 # sched: [6:0.50]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psubq:
-; SKX: # %bb.0:
-; SKX-NEXT: psubq %mm1, %mm0 # sched: [1:0.50]
-; SKX-NEXT: psubq (%rdi), %mm0 # sched: [6:0.50]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_psubq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: psubq %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: psubq (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_psubq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: psubq %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: psubq (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_psubq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: psubq %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: psubq (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_psubsb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_psubsb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psubsb %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT: psubsb (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psubsb:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psubsb %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: psubsb (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psubsb:
-; SLM: # %bb.0:
-; SLM-NEXT: psubsb %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: psubsb (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_psubsb:
-; SANDY: # %bb.0:
-; SANDY-NEXT: psubsb %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT: psubsb (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psubsb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: psubsb %mm1, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT: psubsb (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psubsb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: psubsb %mm1, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT: psubsb (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psubsb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: psubsb %mm1, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: psubsb (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psubsb:
-; SKX: # %bb.0:
-; SKX-NEXT: psubsb %mm1, %mm0 # sched: [1:1.00]
-; SKX-NEXT: psubsb (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_psubsb:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: psubsb %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: psubsb (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_psubsb:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: psubsb %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: psubsb (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_psubsb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: psubsb %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: psubsb (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_psubsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_psubsw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psubsw %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT: psubsw (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psubsw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psubsw %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: psubsw (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psubsw:
-; SLM: # %bb.0:
-; SLM-NEXT: psubsw %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: psubsw (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_psubsw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: psubsw %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT: psubsw (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psubsw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: psubsw %mm1, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT: psubsw (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psubsw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: psubsw %mm1, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT: psubsw (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psubsw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: psubsw %mm1, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: psubsw (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psubsw:
-; SKX: # %bb.0:
-; SKX-NEXT: psubsw %mm1, %mm0 # sched: [1:1.00]
-; SKX-NEXT: psubsw (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_psubsw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: psubsw %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: psubsw (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_psubsw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: psubsw %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: psubsw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_psubsw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: psubsw %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: psubsw (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_psubusb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_psubusb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psubusb %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT: psubusb (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psubusb:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psubusb %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: psubusb (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psubusb:
-; SLM: # %bb.0:
-; SLM-NEXT: psubusb %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: psubusb (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_psubusb:
-; SANDY: # %bb.0:
-; SANDY-NEXT: psubusb %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT: psubusb (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psubusb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: psubusb %mm1, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT: psubusb (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psubusb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: psubusb %mm1, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT: psubusb (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psubusb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: psubusb %mm1, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: psubusb (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psubusb:
-; SKX: # %bb.0:
-; SKX-NEXT: psubusb %mm1, %mm0 # sched: [1:1.00]
-; SKX-NEXT: psubusb (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_psubusb:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: psubusb %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: psubusb (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_psubusb:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: psubusb %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: psubusb (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_psubusb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: psubusb %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: psubusb (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_psubusw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_psubusw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psubusw %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT: psubusw (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psubusw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psubusw %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: psubusw (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psubusw:
-; SLM: # %bb.0:
-; SLM-NEXT: psubusw %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: psubusw (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_psubusw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: psubusw %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT: psubusw (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psubusw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: psubusw %mm1, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT: psubusw (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psubusw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: psubusw %mm1, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT: psubusw (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psubusw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: psubusw %mm1, %mm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: psubusw (%rdi), %mm0 # sched: [6:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psubusw:
-; SKX: # %bb.0:
-; SKX-NEXT: psubusw %mm1, %mm0 # sched: [1:1.00]
-; SKX-NEXT: psubusw (%rdi), %mm0 # sched: [6:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_psubusw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: psubusw %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: psubusw (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_psubusw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: psubusw %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: psubusw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_psubusw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: psubusw %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: psubusw (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_psubw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_psubw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psubw %mm1, %mm0 # sched: [3:1.00]
-; GENERIC-NEXT: psubw (%rdi), %mm0 # sched: [8:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psubw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psubw %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: psubw (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psubw:
-; SLM: # %bb.0:
-; SLM-NEXT: psubw %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: psubw (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_psubw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: psubw %mm1, %mm0 # sched: [3:1.00]
-; SANDY-NEXT: psubw (%rdi), %mm0 # sched: [8:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_psubw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: psubw %mm1, %mm0 # sched: [1:0.50]
-; HASWELL-NEXT: psubw (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psubw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: psubw %mm1, %mm0 # sched: [1:0.50]
-; BROADWELL-NEXT: psubw (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psubw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: psubw %mm1, %mm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: psubw (%rdi), %mm0 # sched: [6:0.50]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psubw:
-; SKX: # %bb.0:
-; SKX-NEXT: psubw %mm1, %mm0 # sched: [1:0.50]
-; SKX-NEXT: psubw (%rdi), %mm0 # sched: [6:0.50]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_psubw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: psubw %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: psubw (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_psubw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: psubw %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: psubw (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_psubw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: psubw %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: psubw (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_punpckhbw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_punpckhbw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00]
-; GENERIC-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_punpckhbw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:0.50]
-; ATOM-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_punpckhbw:
-; SLM: # %bb.0:
-; SLM-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00]
-; SLM-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_punpckhbw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00]
-; SANDY-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_punpckhbw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00]
-; HASWELL-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_punpckhbw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00]
-; BROADWELL-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_punpckhbw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00]
-; SKYLAKE-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_punpckhbw:
-; SKX: # %bb.0:
-; SKX-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00]
-; SKX-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_punpckhbw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [2:0.50]
-; BDVER2-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_punpckhbw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:0.50]
-; BTVER2-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_punpckhbw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:0.25]
-; ZNVER1-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_punpckhdq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_punpckhdq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00]
-; GENERIC-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_punpckhdq:
-; ATOM: # %bb.0:
-; ATOM-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:0.50]
-; ATOM-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_punpckhdq:
-; SLM: # %bb.0:
-; SLM-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00]
-; SLM-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_punpckhdq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00]
-; SANDY-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_punpckhdq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00]
-; HASWELL-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_punpckhdq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00]
-; BROADWELL-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_punpckhdq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00]
-; SKYLAKE-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_punpckhdq:
-; SKX: # %bb.0:
-; SKX-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00]
-; SKX-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_punpckhdq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [2:0.50]
-; BDVER2-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_punpckhdq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:0.50]
-; BTVER2-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_punpckhdq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:0.25]
-; ZNVER1-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_punpckhwd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_punpckhwd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
-; GENERIC-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_punpckhwd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.50]
-; ATOM-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_punpckhwd:
-; SLM: # %bb.0:
-; SLM-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
-; SLM-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_punpckhwd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
-; SANDY-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_punpckhwd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
-; HASWELL-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_punpckhwd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
-; BROADWELL-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_punpckhwd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
-; SKYLAKE-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_punpckhwd:
-; SKX: # %bb.0:
-; SKX-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
-; SKX-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_punpckhwd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [2:0.50]
-; BDVER2-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_punpckhwd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.50]
-; BTVER2-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_punpckhwd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.25]
-; ZNVER1-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_punpcklbw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_punpcklbw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
-; GENERIC-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_punpcklbw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
-; ATOM-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_punpcklbw:
-; SLM: # %bb.0:
-; SLM-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
-; SLM-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_punpcklbw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
-; SANDY-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_punpcklbw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
-; HASWELL-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_punpcklbw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
-; BROADWELL-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_punpcklbw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
-; SKYLAKE-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_punpcklbw:
-; SKX: # %bb.0:
-; SKX-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00]
-; SKX-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_punpcklbw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [2:0.50]
-; BDVER2-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_punpcklbw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.50]
-; BTVER2-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_punpcklbw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.25]
-; ZNVER1-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_punpckldq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_punpckldq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00]
-; GENERIC-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_punpckldq:
-; ATOM: # %bb.0:
-; ATOM-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00]
-; ATOM-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_punpckldq:
-; SLM: # %bb.0:
-; SLM-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00]
-; SLM-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_punpckldq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00]
-; SANDY-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_punpckldq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00]
-; HASWELL-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_punpckldq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00]
-; BROADWELL-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_punpckldq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00]
-; SKYLAKE-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_punpckldq:
-; SKX: # %bb.0:
-; SKX-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00]
-; SKX-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_punpckldq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [2:0.50]
-; BDVER2-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_punpckldq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:0.50]
-; BTVER2-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_punpckldq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:0.25]
-; ZNVER1-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_punpcklwd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_punpcklwd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00]
-; GENERIC-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_punpcklwd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00]
-; ATOM-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_punpcklwd:
-; SLM: # %bb.0:
-; SLM-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00]
-; SLM-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_punpcklwd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00]
-; SANDY-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_punpcklwd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00]
-; HASWELL-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_punpcklwd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00]
-; BROADWELL-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_punpcklwd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00]
-; SKYLAKE-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_punpcklwd:
-; SKX: # %bb.0:
-; SKX-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00]
-; SKX-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_punpcklwd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [2:0.50]
-; BDVER2-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_punpcklwd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:0.50]
-; BTVER2-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_punpcklwd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:0.25]
-; ZNVER1-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone
-
-define i64 @test_pxor(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize {
-; GENERIC-LABEL: test_pxor:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pxor %mm1, %mm0 # sched: [1:0.33]
-; GENERIC-NEXT: pxor (%rdi), %mm0 # sched: [6:0.50]
-; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pxor:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pxor %mm1, %mm0 # sched: [1:0.50]
-; ATOM-NEXT: pxor (%rdi), %mm0 # sched: [1:1.00]
-; ATOM-NEXT: movq %mm0, %rax # sched: [3:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pxor:
-; SLM: # %bb.0:
-; SLM-NEXT: pxor %mm1, %mm0 # sched: [1:0.50]
-; SLM-NEXT: pxor (%rdi), %mm0 # sched: [4:1.00]
-; SLM-NEXT: movq %mm0, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pxor:
-; SANDY: # %bb.0:
-; SANDY-NEXT: pxor %mm1, %mm0 # sched: [1:0.33]
-; SANDY-NEXT: pxor (%rdi), %mm0 # sched: [6:0.50]
-; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pxor:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: pxor %mm1, %mm0 # sched: [1:0.33]
-; HASWELL-NEXT: pxor (%rdi), %mm0 # sched: [6:0.50]
-; HASWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pxor:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: pxor %mm1, %mm0 # sched: [1:0.33]
-; BROADWELL-NEXT: pxor (%rdi), %mm0 # sched: [6:0.50]
-; BROADWELL-NEXT: movq %mm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pxor:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: pxor %mm1, %mm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: pxor (%rdi), %mm0 # sched: [6:0.50]
-; SKYLAKE-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pxor:
-; SKX: # %bb.0:
-; SKX-NEXT: pxor %mm1, %mm0 # sched: [1:0.50]
-; SKX-NEXT: pxor (%rdi), %mm0 # sched: [6:0.50]
-; SKX-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_pxor:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: pxor %mm1, %mm0 # sched: [2:0.50]
-; BDVER2-NEXT: pxor (%rdi), %mm0 # sched: [7:0.50]
-; BDVER2-NEXT: movq %mm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pxor:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: pxor %mm1, %mm0 # sched: [1:0.50]
-; BTVER2-NEXT: pxor (%rdi), %mm0 # sched: [6:1.00]
-; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pxor:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: pxor %mm1, %mm0 # sched: [1:0.25]
-; ZNVER1-NEXT: pxor (%rdi), %mm0 # sched: [8:0.50]
-; ZNVER1-NEXT: movq %mm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %a0, x86_mmx %a1)
- %2 = load x86_mmx, x86_mmx *%a2, align 8
- %3 = call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %1, x86_mmx %2)
- %4 = bitcast x86_mmx %3 to i64
- ret i64 %4
-}
-declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone
diff --git a/llvm/test/CodeGen/X86/movbe-schedule.ll b/llvm/test/CodeGen/X86/movbe-schedule.ll
deleted file mode 100644
index c0b65f36622..00000000000
--- a/llvm/test/CodeGen/X86/movbe-schedule.ll
+++ /dev/null
@@ -1,190 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+movbe | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
-
-define i16 @test_movbe_i16(i16 *%a0, i16 %a1, i16 *%a2) {
-; GENERIC-LABEL: test_movbe_i16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movbew (%rdi), %ax # sched: [6:0.50]
-; GENERIC-NEXT: movbew %si, (%rdx) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movbe_i16:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movbew (%rdi), %ax # sched: [1:1.00]
-; ATOM-NEXT: movbew %si, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movbe_i16:
-; SLM: # %bb.0:
-; SLM-NEXT: movbew (%rdi), %ax # sched: [4:1.00]
-; SLM-NEXT: movbew %si, (%rdx) # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; HASWELL-LABEL: test_movbe_i16:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movbew (%rdi), %ax # sched: [6:0.50]
-; HASWELL-NEXT: movbew %si, (%rdx) # sched: [2:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movbe_i16:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movbew (%rdi), %ax # sched: [6:0.50]
-; BROADWELL-NEXT: movbew %si, (%rdx) # sched: [2:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movbe_i16:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movbew (%rdi), %ax # sched: [6:0.50]
-; SKYLAKE-NEXT: movbew %si, (%rdx) # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BTVER2-LABEL: test_movbe_i16:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movbew (%rdi), %ax # sched: [4:1.00]
-; BTVER2-NEXT: movbew %si, (%rdx) # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_movbe_i16:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movbew (%rdi), %ax # sched: [5:0.50]
-; ZNVER1-NEXT: movbew %si, (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i16, i16 *%a0
- %2 = tail call i16 @llvm.bswap.i16( i16 %1 )
- %3 = tail call i16 @llvm.bswap.i16( i16 %a1 )
- store i16 %3, i16* %a2, align 2
- ret i16 %2
-}
-declare i16 @llvm.bswap.i16(i16)
-
-define i32 @test_movbe_i32(i32 *%a0, i32 %a1, i32 *%a2) {
-; GENERIC-LABEL: test_movbe_i32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movbel (%rdi), %eax # sched: [6:0.50]
-; GENERIC-NEXT: movbel %esi, (%rdx) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movbe_i32:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movbel (%rdi), %eax # sched: [1:1.00]
-; ATOM-NEXT: movbel %esi, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movbe_i32:
-; SLM: # %bb.0:
-; SLM-NEXT: movbel (%rdi), %eax # sched: [4:1.00]
-; SLM-NEXT: movbel %esi, (%rdx) # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; HASWELL-LABEL: test_movbe_i32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movbel (%rdi), %eax # sched: [6:0.50]
-; HASWELL-NEXT: movbel %esi, (%rdx) # sched: [2:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movbe_i32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movbel (%rdi), %eax # sched: [6:0.50]
-; BROADWELL-NEXT: movbel %esi, (%rdx) # sched: [2:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movbe_i32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movbel (%rdi), %eax # sched: [6:0.50]
-; SKYLAKE-NEXT: movbel %esi, (%rdx) # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BTVER2-LABEL: test_movbe_i32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movbel (%rdi), %eax # sched: [4:1.00]
-; BTVER2-NEXT: movbel %esi, (%rdx) # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_movbe_i32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movbel (%rdi), %eax # sched: [5:0.50]
-; ZNVER1-NEXT: movbel %esi, (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i32, i32 *%a0
- %2 = tail call i32 @llvm.bswap.i32( i32 %1 )
- %3 = tail call i32 @llvm.bswap.i32( i32 %a1 )
- store i32 %3, i32* %a2, align 2
- ret i32 %2
-}
-declare i32 @llvm.bswap.i32(i32)
-
-define i64 @test_movbe_i64(i64 *%a0, i64 %a1, i64 *%a2) {
-; GENERIC-LABEL: test_movbe_i64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movbeq (%rdi), %rax # sched: [6:0.50]
-; GENERIC-NEXT: movbeq %rsi, (%rdx) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movbe_i64:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movbeq (%rdi), %rax # sched: [1:1.00]
-; ATOM-NEXT: movbeq %rsi, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movbe_i64:
-; SLM: # %bb.0:
-; SLM-NEXT: movbeq (%rdi), %rax # sched: [4:1.00]
-; SLM-NEXT: movbeq %rsi, (%rdx) # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; HASWELL-LABEL: test_movbe_i64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movbeq (%rdi), %rax # sched: [6:0.50]
-; HASWELL-NEXT: movbeq %rsi, (%rdx) # sched: [2:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movbe_i64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movbeq (%rdi), %rax # sched: [6:0.50]
-; BROADWELL-NEXT: movbeq %rsi, (%rdx) # sched: [2:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movbe_i64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movbeq (%rdi), %rax # sched: [6:0.50]
-; SKYLAKE-NEXT: movbeq %rsi, (%rdx) # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BTVER2-LABEL: test_movbe_i64:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movbeq (%rdi), %rax # sched: [4:1.00]
-; BTVER2-NEXT: movbeq %rsi, (%rdx) # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_movbe_i64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movbeq (%rdi), %rax # sched: [5:0.50]
-; ZNVER1-NEXT: movbeq %rsi, (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i64, i64 *%a0
- %2 = tail call i64 @llvm.bswap.i64( i64 %1 )
- %3 = tail call i64 @llvm.bswap.i64( i64 %a1 )
- store i64 %3, i64* %a2, align 2
- ret i64 %2
-}
-declare i64 @llvm.bswap.i64(i64)
diff --git a/llvm/test/CodeGen/X86/mul-constant-i32.ll b/llvm/test/CodeGen/X86/mul-constant-i32.ll
index 0ad16878b7b..3b2abf8c1f3 100644
--- a/llvm/test/CodeGen/X86/mul-constant-i32.ll
+++ b/llvm/test/CodeGen/X86/mul-constant-i32.ll
@@ -1,12 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86
-; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=haswell| FileCheck %s --check-prefix=X64-HSW
-; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=btver2| FileCheck %s --check-prefix=X64-JAG
-; RUN: llc < %s -mtriple=i686-unknown -mul-constant-optimization=false | FileCheck %s --check-prefix=X86-NOOPT
-; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=haswell| FileCheck %s --check-prefix=HSW-NOOPT
-; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=btver2| FileCheck %s --check-prefix=JAG-NOOPT
-; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=slm| FileCheck %s --check-prefix=X64-SLM
-; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=slm| FileCheck %s --check-prefix=SLM-NOOPT
+; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=haswell | FileCheck %s --check-prefixes=X64,X64-HSW
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=btver2 | FileCheck %s --check-prefixes=X64,X64-JAG
+; RUN: llc < %s -mtriple=i686-unknown -mul-constant-optimization=false | FileCheck %s --check-prefix=X86-NOOPT
+; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -mcpu=haswell | FileCheck %s --check-prefixes=X64-NOOPT,HSW-NOOPT
+; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -mcpu=btver2 | FileCheck %s --check-prefixes=X64-NOOPT,JAG-NOOPT
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=slm | FileCheck %s --check-prefixes=X64,X64-SLM
+; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -mcpu=slm | FileCheck %s --check-prefixes=X64-NOOPT,SLM-NOOPT
define i32 @test_mul_by_1(i32 %x) {
; X86-LABEL: test_mul_by_1:
@@ -14,40 +14,20 @@ define i32 @test_mul_by_1(i32 %x) {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_1:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_1:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.50]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-LABEL: test_mul_by_1:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_1:
; X86-NOOPT: # %bb.0:
; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_1:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_1:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_1:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_1:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_1:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: movl %edi, %eax
+; X64-NOOPT-NEXT: retq
%mul = mul nsw i32 %x, 1
ret i32 %mul
}
@@ -59,17 +39,11 @@ define i32 @test_mul_by_2(i32 %x) {
; X86-NEXT: addl %eax, %eax
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_2:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-HSW-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_2:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-JAG-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-LABEL: test_mul_by_2:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: leal (%rdi,%rdi), %eax
+; X64-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_2:
; X86-NOOPT: # %bb.0:
@@ -77,29 +51,11 @@ define i32 @test_mul_by_2(i32 %x) {
; X86-NOOPT-NEXT: addl %eax, %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_2:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi
-; HSW-NOOPT-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_2:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi
-; JAG-NOOPT-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_2:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-SLM-NEXT: leal (%rdi,%rdi), %eax # sched: [1:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_2:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi
-; SLM-NOOPT-NEXT: leal (%rdi,%rdi), %eax # sched: [1:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_2:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NOOPT-NEXT: leal (%rdi,%rdi), %eax
+; X64-NOOPT-NEXT: retq
%mul = mul nsw i32 %x, 2
ret i32 %mul
}
@@ -111,46 +67,22 @@ define i32 @test_mul_by_3(i32 %x) {
; X86-NEXT: leal (%eax,%eax,2), %eax
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_3:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_3:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-LABEL: test_mul_by_3:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: leal (%rdi,%rdi,2), %eax
+; X64-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_3:
; X86-NOOPT: # %bb.0:
; X86-NOOPT-NEXT: imull $3, {{[0-9]+}}(%esp), %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_3:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi
-; HSW-NOOPT-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_3:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi
-; JAG-NOOPT-NEXT: leal (%rdi,%rdi,2), %eax # sched: [2:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_3:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-SLM-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_3:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi
-; SLM-NOOPT-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_3:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NOOPT-NEXT: leal (%rdi,%rdi,2), %eax
+; X64-NOOPT-NEXT: retq
%mul = mul nsw i32 %x, 3
ret i32 %mul
}
@@ -162,17 +94,11 @@ define i32 @test_mul_by_4(i32 %x) {
; X86-NEXT: shll $2, %eax
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_4:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-HSW-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_4:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-JAG-NEXT: leal (,%rdi,4), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-LABEL: test_mul_by_4:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: leal (,%rdi,4), %eax
+; X64-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_4:
; X86-NOOPT: # %bb.0:
@@ -180,29 +106,11 @@ define i32 @test_mul_by_4(i32 %x) {
; X86-NOOPT-NEXT: shll $2, %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_4:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi
-; HSW-NOOPT-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_4:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi
-; JAG-NOOPT-NEXT: leal (,%rdi,4), %eax # sched: [2:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_4:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-SLM-NEXT: leal (,%rdi,4), %eax # sched: [1:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_4:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi
-; SLM-NOOPT-NEXT: leal (,%rdi,4), %eax # sched: [1:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_4:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NOOPT-NEXT: leal (,%rdi,4), %eax
+; X64-NOOPT-NEXT: retq
%mul = mul nsw i32 %x, 4
ret i32 %mul
}
@@ -214,46 +122,22 @@ define i32 @test_mul_by_5(i32 %x) {
; X86-NEXT: leal (%eax,%eax,4), %eax
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_5:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_5:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-LABEL: test_mul_by_5:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: leal (%rdi,%rdi,4), %eax
+; X64-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_5:
; X86-NOOPT: # %bb.0:
; X86-NOOPT-NEXT: imull $5, {{[0-9]+}}(%esp), %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_5:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi
-; HSW-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_5:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi
-; JAG-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [2:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_5:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_5:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi
-; SLM-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_5:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax
+; X64-NOOPT-NEXT: retq
%mul = mul nsw i32 %x, 5
ret i32 %mul
}
@@ -266,46 +150,22 @@ define i32 @test_mul_by_6(i32 %x) {
; X86-NEXT: leal (%eax,%eax,2), %eax
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_6:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25]
-; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_6:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-JAG-NEXT: addl %edi, %edi # sched: [1:0.50]
-; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-LABEL: test_mul_by_6:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: addl %edi, %edi
+; X64-NEXT: leal (%rdi,%rdi,2), %eax
+; X64-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_6:
; X86-NOOPT: # %bb.0:
; X86-NOOPT-NEXT: imull $6, {{[0-9]+}}(%esp), %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_6:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imull $6, %edi, %eax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_6:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imull $6, %edi, %eax # sched: [3:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_6:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-SLM-NEXT: addl %edi, %edi # sched: [1:0.50]
-; X64-SLM-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_6:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imull $6, %edi, %eax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_6:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imull $6, %edi, %eax
+; X64-NOOPT-NEXT: retq
%mul = mul nsw i32 %x, 6
ret i32 %mul
}
@@ -318,46 +178,22 @@ define i32 @test_mul_by_7(i32 %x) {
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_7:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-HSW-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_7:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-JAG-NEXT: leal (,%rdi,8), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-LABEL: test_mul_by_7:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: leal (,%rdi,8), %eax
+; X64-NEXT: subl %edi, %eax
+; X64-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_7:
; X86-NOOPT: # %bb.0:
; X86-NOOPT-NEXT: imull $7, {{[0-9]+}}(%esp), %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_7:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imull $7, %edi, %eax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_7:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imull $7, %edi, %eax # sched: [3:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_7:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-SLM-NEXT: leal (,%rdi,8), %eax # sched: [1:1.00]
-; X64-SLM-NEXT: subl %edi, %eax # sched: [1:0.50]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_7:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imull $7, %edi, %eax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_7:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imull $7, %edi, %eax
+; X64-NOOPT-NEXT: retq
%mul = mul nsw i32 %x, 7
ret i32 %mul
}
@@ -369,17 +205,11 @@ define i32 @test_mul_by_8(i32 %x) {
; X86-NEXT: shll $3, %eax
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_8:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-HSW-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_8:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-JAG-NEXT: leal (,%rdi,8), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-LABEL: test_mul_by_8:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: leal (,%rdi,8), %eax
+; X64-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_8:
; X86-NOOPT: # %bb.0:
@@ -387,29 +217,11 @@ define i32 @test_mul_by_8(i32 %x) {
; X86-NOOPT-NEXT: shll $3, %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_8:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi
-; HSW-NOOPT-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_8:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi
-; JAG-NOOPT-NEXT: leal (,%rdi,8), %eax # sched: [2:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_8:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-SLM-NEXT: leal (,%rdi,8), %eax # sched: [1:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_8:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi
-; SLM-NOOPT-NEXT: leal (,%rdi,8), %eax # sched: [1:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_8:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NOOPT-NEXT: leal (,%rdi,8), %eax
+; X64-NOOPT-NEXT: retq
%mul = mul nsw i32 %x, 8
ret i32 %mul
}
@@ -421,46 +233,22 @@ define i32 @test_mul_by_9(i32 %x) {
; X86-NEXT: leal (%eax,%eax,8), %eax
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_9:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_9:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-LABEL: test_mul_by_9:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: leal (%rdi,%rdi,8), %eax
+; X64-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_9:
; X86-NOOPT: # %bb.0:
; X86-NOOPT-NEXT: imull $9, {{[0-9]+}}(%esp), %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_9:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi
-; HSW-NOOPT-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_9:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi
-; JAG-NOOPT-NEXT: leal (%rdi,%rdi,8), %eax # sched: [2:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_9:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-SLM-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_9:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi
-; SLM-NOOPT-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_9:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NOOPT-NEXT: leal (%rdi,%rdi,8), %eax
+; X64-NOOPT-NEXT: retq
%mul = mul nsw i32 %x, 9
ret i32 %mul
}
@@ -473,46 +261,22 @@ define i32 @test_mul_by_10(i32 %x) {
; X86-NEXT: leal (%eax,%eax,4), %eax
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_10:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25]
-; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_10:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-JAG-NEXT: addl %edi, %edi # sched: [1:0.50]
-; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-LABEL: test_mul_by_10:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: addl %edi, %edi
+; X64-NEXT: leal (%rdi,%rdi,4), %eax
+; X64-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_10:
; X86-NOOPT: # %bb.0:
; X86-NOOPT-NEXT: imull $10, {{[0-9]+}}(%esp), %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_10:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imull $10, %edi, %eax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_10:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imull $10, %edi, %eax # sched: [3:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_10:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-SLM-NEXT: addl %edi, %edi # sched: [1:0.50]
-; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_10:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imull $10, %edi, %eax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_10:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imull $10, %edi, %eax
+; X64-NOOPT-NEXT: retq
%mul = mul nsw i32 %x, 10
ret i32 %mul
}
@@ -528,41 +292,31 @@ define i32 @test_mul_by_11(i32 %x) {
; X64-HSW-LABEL: test_mul_by_11:
; X64-HSW: # %bb.0:
; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: leal (%rdi,%rax,2), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
+; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax
+; X64-HSW-NEXT: leal (%rdi,%rax,2), %eax
+; X64-HSW-NEXT: retq
;
; X64-JAG-LABEL: test_mul_by_11:
; X64-JAG: # %bb.0:
; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: leal (%rdi,%rax,2), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax
+; X64-JAG-NEXT: leal (%rdi,%rax,2), %eax
+; X64-JAG-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_11:
; X86-NOOPT: # %bb.0:
; X86-NOOPT-NEXT: imull $11, {{[0-9]+}}(%esp), %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_11:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imull $11, %edi, %eax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_11:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imull $11, %edi, %eax # sched: [3:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_11:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imull $11, %edi, %eax
+; X64-NOOPT-NEXT: retq
;
; X64-SLM-LABEL: test_mul_by_11:
; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: imull $11, %edi, %eax # sched: [3:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_11:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imull $11, %edi, %eax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-SLM-NEXT: imull $11, %edi, %eax
+; X64-SLM-NEXT: retq
%mul = mul nsw i32 %x, 11
ret i32 %mul
}
@@ -575,46 +329,22 @@ define i32 @test_mul_by_12(i32 %x) {
; X86-NEXT: leal (%eax,%eax,2), %eax
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_12:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-HSW-NEXT: shll $2, %edi # sched: [1:0.50]
-; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_12:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-JAG-NEXT: shll $2, %edi # sched: [1:0.50]
-; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-LABEL: test_mul_by_12:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: shll $2, %edi
+; X64-NEXT: leal (%rdi,%rdi,2), %eax
+; X64-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_12:
; X86-NOOPT: # %bb.0:
; X86-NOOPT-NEXT: imull $12, {{[0-9]+}}(%esp), %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_12:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imull $12, %edi, %eax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_12:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imull $12, %edi, %eax # sched: [3:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_12:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-SLM-NEXT: shll $2, %edi # sched: [1:1.00]
-; X64-SLM-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_12:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imull $12, %edi, %eax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_12:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imull $12, %edi, %eax
+; X64-NOOPT-NEXT: retq
%mul = mul nsw i32 %x, 12
ret i32 %mul
}
@@ -630,41 +360,31 @@ define i32 @test_mul_by_13(i32 %x) {
; X64-HSW-LABEL: test_mul_by_13:
; X64-HSW: # %bb.0:
; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
+; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax
+; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax
+; X64-HSW-NEXT: retq
;
; X64-JAG-LABEL: test_mul_by_13:
; X64-JAG: # %bb.0:
; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax
+; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax
+; X64-JAG-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_13:
; X86-NOOPT: # %bb.0:
; X86-NOOPT-NEXT: imull $13, {{[0-9]+}}(%esp), %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_13:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imull $13, %edi, %eax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_13:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imull $13, %edi, %eax # sched: [3:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_13:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imull $13, %edi, %eax
+; X64-NOOPT-NEXT: retq
;
; X64-SLM-LABEL: test_mul_by_13:
; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: imull $13, %edi, %eax # sched: [3:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_13:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imull $13, %edi, %eax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-SLM-NEXT: imull $13, %edi, %eax
+; X64-SLM-NEXT: retq
%mul = mul nsw i32 %x, 13
ret i32 %mul
}
@@ -679,49 +399,23 @@ define i32 @test_mul_by_14(i32 %x) {
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_14:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
-; X64-HSW-NEXT: shll $4, %eax # sched: [1:0.50]
-; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
-; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_14:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.50]
-; X64-JAG-NEXT: shll $4, %eax # sched: [1:0.50]
-; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50]
-; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-LABEL: test_mul_by_14:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: shll $4, %eax
+; X64-NEXT: subl %edi, %eax
+; X64-NEXT: subl %edi, %eax
+; X64-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_14:
; X86-NOOPT: # %bb.0:
; X86-NOOPT-NEXT: imull $14, {{[0-9]+}}(%esp), %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_14:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imull $14, %edi, %eax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_14:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imull $14, %edi, %eax # sched: [3:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_14:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50]
-; X64-SLM-NEXT: shll $4, %eax # sched: [1:1.00]
-; X64-SLM-NEXT: subl %edi, %eax # sched: [1:0.50]
-; X64-SLM-NEXT: subl %edi, %eax # sched: [1:0.50]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_14:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imull $14, %edi, %eax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_14:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imull $14, %edi, %eax
+; X64-NOOPT-NEXT: retq
%mul = mul nsw i32 %x, 14
ret i32 %mul
}
@@ -734,46 +428,22 @@ define i32 @test_mul_by_15(i32 %x) {
; X86-NEXT: leal (%eax,%eax,2), %eax
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_15:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_15:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-LABEL: test_mul_by_15:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: leal (%rdi,%rdi,4), %eax
+; X64-NEXT: leal (%rax,%rax,2), %eax
+; X64-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_15:
; X86-NOOPT: # %bb.0:
; X86-NOOPT-NEXT: imull $15, {{[0-9]+}}(%esp), %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_15:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imull $15, %edi, %eax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_15:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imull $15, %edi, %eax # sched: [3:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_15:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00]
-; X64-SLM-NEXT: leal (%rax,%rax,2), %eax # sched: [1:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_15:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imull $15, %edi, %eax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_15:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imull $15, %edi, %eax
+; X64-NOOPT-NEXT: retq
%mul = mul nsw i32 %x, 15
ret i32 %mul
}
@@ -785,17 +455,11 @@ define i32 @test_mul_by_16(i32 %x) {
; X86-NEXT: shll $4, %eax
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_16:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
-; X64-HSW-NEXT: shll $4, %eax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_16:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.50]
-; X64-JAG-NEXT: shll $4, %eax # sched: [1:0.50]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-LABEL: test_mul_by_16:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: shll $4, %eax
+; X64-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_16:
; X86-NOOPT: # %bb.0:
@@ -803,29 +467,11 @@ define i32 @test_mul_by_16(i32 %x) {
; X86-NOOPT-NEXT: shll $4, %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_16:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25]
-; HSW-NOOPT-NEXT: shll $4, %eax # sched: [1:0.50]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_16:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50]
-; JAG-NOOPT-NEXT: shll $4, %eax # sched: [1:0.50]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_16:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50]
-; X64-SLM-NEXT: shll $4, %eax # sched: [1:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_16:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50]
-; SLM-NOOPT-NEXT: shll $4, %eax # sched: [1:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_16:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: movl %edi, %eax
+; X64-NOOPT-NEXT: shll $4, %eax
+; X64-NOOPT-NEXT: retq
%mul = mul nsw i32 %x, 16
ret i32 %mul
}
@@ -839,49 +485,23 @@ define i32 @test_mul_by_17(i32 %x) {
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_17:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
-; X64-HSW-NEXT: shll $4, %eax # sched: [1:0.50]
-; X64-HSW-NEXT: leal (%rax,%rdi), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_17:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.50]
-; X64-JAG-NEXT: shll $4, %eax # sched: [1:0.50]
-; X64-JAG-NEXT: leal (%rax,%rdi), %eax # sched: [1:0.50]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-LABEL: test_mul_by_17:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: shll $4, %eax
+; X64-NEXT: leal (%rax,%rdi), %eax
+; X64-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_17:
; X86-NOOPT: # %bb.0:
; X86-NOOPT-NEXT: imull $17, {{[0-9]+}}(%esp), %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_17:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imull $17, %edi, %eax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_17:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imull $17, %edi, %eax # sched: [3:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_17:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50]
-; X64-SLM-NEXT: shll $4, %eax # sched: [1:1.00]
-; X64-SLM-NEXT: leal (%rax,%rdi), %eax # sched: [1:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_17:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imull $17, %edi, %eax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_17:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imull $17, %edi, %eax
+; X64-NOOPT-NEXT: retq
%mul = mul nsw i32 %x, 17
ret i32 %mul
}
@@ -894,46 +514,22 @@ define i32 @test_mul_by_18(i32 %x) {
; X86-NEXT: leal (%eax,%eax,8), %eax
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_18:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25]
-; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_18:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-JAG-NEXT: addl %edi, %edi # sched: [1:0.50]
-; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-LABEL: test_mul_by_18:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: addl %edi, %edi
+; X64-NEXT: leal (%rdi,%rdi,8), %eax
+; X64-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_18:
; X86-NOOPT: # %bb.0:
; X86-NOOPT-NEXT: imull $18, {{[0-9]+}}(%esp), %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_18:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imull $18, %edi, %eax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_18:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imull $18, %edi, %eax # sched: [3:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_18:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-SLM-NEXT: addl %edi, %edi # sched: [1:0.50]
-; X64-SLM-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_18:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imull $18, %edi, %eax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_18:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imull $18, %edi, %eax
+; X64-NOOPT-NEXT: retq
%mul = mul nsw i32 %x, 18
ret i32 %mul
}
@@ -949,41 +545,31 @@ define i32 @test_mul_by_19(i32 %x) {
; X64-HSW-LABEL: test_mul_by_19:
; X64-HSW: # %bb.0:
; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: leal (%rdi,%rax,2), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
+; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax
+; X64-HSW-NEXT: leal (%rdi,%rax,2), %eax
+; X64-HSW-NEXT: retq
;
; X64-JAG-LABEL: test_mul_by_19:
; X64-JAG: # %bb.0:
; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: leal (%rdi,%rax,2), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax
+; X64-JAG-NEXT: leal (%rdi,%rax,2), %eax
+; X64-JAG-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_19:
; X86-NOOPT: # %bb.0:
; X86-NOOPT-NEXT: imull $19, {{[0-9]+}}(%esp), %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_19:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imull $19, %edi, %eax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_19:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imull $19, %edi, %eax # sched: [3:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_19:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imull $19, %edi, %eax
+; X64-NOOPT-NEXT: retq
;
; X64-SLM-LABEL: test_mul_by_19:
; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: imull $19, %edi, %eax # sched: [3:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_19:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imull $19, %edi, %eax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-SLM-NEXT: imull $19, %edi, %eax
+; X64-SLM-NEXT: retq
%mul = mul nsw i32 %x, 19
ret i32 %mul
}
@@ -996,46 +582,22 @@ define i32 @test_mul_by_20(i32 %x) {
; X86-NEXT: leal (%eax,%eax,4), %eax
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_20:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-HSW-NEXT: shll $2, %edi # sched: [1:0.50]
-; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_20:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-JAG-NEXT: shll $2, %edi # sched: [1:0.50]
-; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-LABEL: test_mul_by_20:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: shll $2, %edi
+; X64-NEXT: leal (%rdi,%rdi,4), %eax
+; X64-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_20:
; X86-NOOPT: # %bb.0:
; X86-NOOPT-NEXT: imull $20, {{[0-9]+}}(%esp), %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_20:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imull $20, %edi, %eax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_20:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imull $20, %edi, %eax # sched: [3:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_20:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-SLM-NEXT: shll $2, %edi # sched: [1:1.00]
-; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_20:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imull $20, %edi, %eax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_20:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imull $20, %edi, %eax
+; X64-NOOPT-NEXT: retq
%mul = mul nsw i32 %x, 20
ret i32 %mul
}
@@ -1051,41 +613,31 @@ define i32 @test_mul_by_21(i32 %x) {
; X64-HSW-LABEL: test_mul_by_21:
; X64-HSW: # %bb.0:
; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
+; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax
+; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax
+; X64-HSW-NEXT: retq
;
; X64-JAG-LABEL: test_mul_by_21:
; X64-JAG: # %bb.0:
; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax
+; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax
+; X64-JAG-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_21:
; X86-NOOPT: # %bb.0:
; X86-NOOPT-NEXT: imull $21, {{[0-9]+}}(%esp), %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_21:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imull $21, %edi, %eax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_21:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imull $21, %edi, %eax # sched: [3:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_21:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imull $21, %edi, %eax
+; X64-NOOPT-NEXT: retq
;
; X64-SLM-LABEL: test_mul_by_21:
; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: imull $21, %edi, %eax # sched: [3:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_21:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imull $21, %edi, %eax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-SLM-NEXT: imull $21, %edi, %eax
+; X64-SLM-NEXT: retq
%mul = mul nsw i32 %x, 21
ret i32 %mul
}
@@ -1102,43 +654,33 @@ define i32 @test_mul_by_22(i32 %x) {
; X64-HSW-LABEL: test_mul_by_22:
; X64-HSW: # %bb.0:
; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
+; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax
+; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax
+; X64-HSW-NEXT: addl %edi, %eax
+; X64-HSW-NEXT: retq
;
; X64-JAG-LABEL: test_mul_by_22:
; X64-JAG: # %bb.0:
; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax
+; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax
+; X64-JAG-NEXT: addl %edi, %eax
+; X64-JAG-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_22:
; X86-NOOPT: # %bb.0:
; X86-NOOPT-NEXT: imull $22, {{[0-9]+}}(%esp), %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_22:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imull $22, %edi, %eax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_22:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imull $22, %edi, %eax # sched: [3:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_22:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imull $22, %edi, %eax
+; X64-NOOPT-NEXT: retq
;
; X64-SLM-LABEL: test_mul_by_22:
; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: imull $22, %edi, %eax # sched: [3:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_22:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imull $22, %edi, %eax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-SLM-NEXT: imull $22, %edi, %eax
+; X64-SLM-NEXT: retq
%mul = mul nsw i32 %x, 22
ret i32 %mul
}
@@ -1155,43 +697,33 @@ define i32 @test_mul_by_23(i32 %x) {
; X64-HSW-LABEL: test_mul_by_23:
; X64-HSW: # %bb.0:
; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: shll $3, %eax # sched: [1:0.50]
-; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
+; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax
+; X64-HSW-NEXT: shll $3, %eax
+; X64-HSW-NEXT: subl %edi, %eax
+; X64-HSW-NEXT: retq
;
; X64-JAG-LABEL: test_mul_by_23:
; X64-JAG: # %bb.0:
; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: shll $3, %eax # sched: [1:0.50]
-; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax
+; X64-JAG-NEXT: shll $3, %eax
+; X64-JAG-NEXT: subl %edi, %eax
+; X64-JAG-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_23:
; X86-NOOPT: # %bb.0:
; X86-NOOPT-NEXT: imull $23, {{[0-9]+}}(%esp), %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_23:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imull $23, %edi, %eax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_23:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imull $23, %edi, %eax # sched: [3:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_23:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imull $23, %edi, %eax
+; X64-NOOPT-NEXT: retq
;
; X64-SLM-LABEL: test_mul_by_23:
; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: imull $23, %edi, %eax # sched: [3:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_23:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imull $23, %edi, %eax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-SLM-NEXT: imull $23, %edi, %eax
+; X64-SLM-NEXT: retq
%mul = mul nsw i32 %x, 23
ret i32 %mul
}
@@ -1204,46 +736,22 @@ define i32 @test_mul_by_24(i32 %x) {
; X86-NEXT: leal (%eax,%eax,2), %eax
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_24:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-HSW-NEXT: shll $3, %edi # sched: [1:0.50]
-; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_24:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-JAG-NEXT: shll $3, %edi # sched: [1:0.50]
-; X64-JAG-NEXT: leal (%rdi,%rdi,2), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-LABEL: test_mul_by_24:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: shll $3, %edi
+; X64-NEXT: leal (%rdi,%rdi,2), %eax
+; X64-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_24:
; X86-NOOPT: # %bb.0:
; X86-NOOPT-NEXT: imull $24, {{[0-9]+}}(%esp), %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_24:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imull $24, %edi, %eax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_24:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imull $24, %edi, %eax # sched: [3:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_24:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-SLM-NEXT: shll $3, %edi # sched: [1:1.00]
-; X64-SLM-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_24:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imull $24, %edi, %eax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_24:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imull $24, %edi, %eax
+; X64-NOOPT-NEXT: retq
%mul = mul nsw i32 %x, 24
ret i32 %mul
}
@@ -1256,46 +764,22 @@ define i32 @test_mul_by_25(i32 %x) {
; X86-NEXT: leal (%eax,%eax,4), %eax
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_25:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: leal (%rax,%rax,4), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_25:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: leal (%rax,%rax,4), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-LABEL: test_mul_by_25:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: leal (%rdi,%rdi,4), %eax
+; X64-NEXT: leal (%rax,%rax,4), %eax
+; X64-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_25:
; X86-NOOPT: # %bb.0:
; X86-NOOPT-NEXT: imull $25, {{[0-9]+}}(%esp), %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_25:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imull $25, %edi, %eax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_25:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imull $25, %edi, %eax # sched: [3:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_25:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00]
-; X64-SLM-NEXT: leal (%rax,%rax,4), %eax # sched: [1:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_25:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imull $25, %edi, %eax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_25:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imull $25, %edi, %eax
+; X64-NOOPT-NEXT: retq
%mul = mul nsw i32 %x, 25
ret i32 %mul
}
@@ -1312,43 +796,33 @@ define i32 @test_mul_by_26(i32 %x) {
; X64-HSW-LABEL: test_mul_by_26:
; X64-HSW: # %bb.0:
; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: leal (%rax,%rax,4), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
+; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax
+; X64-HSW-NEXT: leal (%rax,%rax,4), %eax
+; X64-HSW-NEXT: addl %edi, %eax
+; X64-HSW-NEXT: retq
;
; X64-JAG-LABEL: test_mul_by_26:
; X64-JAG: # %bb.0:
; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: leal (%rax,%rax,4), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax
+; X64-JAG-NEXT: leal (%rax,%rax,4), %eax
+; X64-JAG-NEXT: addl %edi, %eax
+; X64-JAG-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_26:
; X86-NOOPT: # %bb.0:
; X86-NOOPT-NEXT: imull $26, {{[0-9]+}}(%esp), %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_26:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imull $26, %edi, %eax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_26:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imull $26, %edi, %eax # sched: [3:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_26:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imull $26, %edi, %eax
+; X64-NOOPT-NEXT: retq
;
; X64-SLM-LABEL: test_mul_by_26:
; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: imull $26, %edi, %eax # sched: [3:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_26:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imull $26, %edi, %eax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-SLM-NEXT: imull $26, %edi, %eax
+; X64-SLM-NEXT: retq
%mul = mul nsw i32 %x, 26
ret i32 %mul
}
@@ -1361,46 +835,22 @@ define i32 @test_mul_by_27(i32 %x) {
; X86-NEXT: leal (%eax,%eax,2), %eax
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_27:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_27:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-LABEL: test_mul_by_27:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: leal (%rdi,%rdi,8), %eax
+; X64-NEXT: leal (%rax,%rax,2), %eax
+; X64-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_27:
; X86-NOOPT: # %bb.0:
; X86-NOOPT-NEXT: imull $27, {{[0-9]+}}(%esp), %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_27:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imull $27, %edi, %eax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_27:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imull $27, %edi, %eax # sched: [3:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_27:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-SLM-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:1.00]
-; X64-SLM-NEXT: leal (%rax,%rax,2), %eax # sched: [1:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_27:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imull $27, %edi, %eax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_27:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imull $27, %edi, %eax
+; X64-NOOPT-NEXT: retq
%mul = mul nsw i32 %x, 27
ret i32 %mul
}
@@ -1417,43 +867,33 @@ define i32 @test_mul_by_28(i32 %x) {
; X64-HSW-LABEL: test_mul_by_28:
; X64-HSW: # %bb.0:
; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
+; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax
+; X64-HSW-NEXT: leal (%rax,%rax,2), %eax
+; X64-HSW-NEXT: addl %edi, %eax
+; X64-HSW-NEXT: retq
;
; X64-JAG-LABEL: test_mul_by_28:
; X64-JAG: # %bb.0:
; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax
+; X64-JAG-NEXT: leal (%rax,%rax,2), %eax
+; X64-JAG-NEXT: addl %edi, %eax
+; X64-JAG-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_28:
; X86-NOOPT: # %bb.0:
; X86-NOOPT-NEXT: imull $28, {{[0-9]+}}(%esp), %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_28:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imull $28, %edi, %eax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_28:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imull $28, %edi, %eax # sched: [3:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_28:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imull $28, %edi, %eax
+; X64-NOOPT-NEXT: retq
;
; X64-SLM-LABEL: test_mul_by_28:
; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: imull $28, %edi, %eax # sched: [3:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_28:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imull $28, %edi, %eax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-SLM-NEXT: imull $28, %edi, %eax
+; X64-SLM-NEXT: retq
%mul = mul nsw i32 %x, 28
ret i32 %mul
}
@@ -1471,45 +911,35 @@ define i32 @test_mul_by_29(i32 %x) {
; X64-HSW-LABEL: test_mul_by_29:
; X64-HSW: # %bb.0:
; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
-; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
+; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax
+; X64-HSW-NEXT: leal (%rax,%rax,2), %eax
+; X64-HSW-NEXT: addl %edi, %eax
+; X64-HSW-NEXT: addl %edi, %eax
+; X64-HSW-NEXT: retq
;
; X64-JAG-LABEL: test_mul_by_29:
; X64-JAG: # %bb.0:
; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50]
-; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax
+; X64-JAG-NEXT: leal (%rax,%rax,2), %eax
+; X64-JAG-NEXT: addl %edi, %eax
+; X64-JAG-NEXT: addl %edi, %eax
+; X64-JAG-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_29:
; X86-NOOPT: # %bb.0:
; X86-NOOPT-NEXT: imull $29, {{[0-9]+}}(%esp), %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_29:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imull $29, %edi, %eax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_29:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imull $29, %edi, %eax # sched: [3:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_29:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imull $29, %edi, %eax
+; X64-NOOPT-NEXT: retq
;
; X64-SLM-LABEL: test_mul_by_29:
; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: imull $29, %edi, %eax # sched: [3:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_29:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imull $29, %edi, %eax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-SLM-NEXT: imull $29, %edi, %eax
+; X64-SLM-NEXT: retq
%mul = mul nsw i32 %x, 29
ret i32 %mul
}
@@ -1524,49 +954,23 @@ define i32 @test_mul_by_30(i32 %x) {
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_30:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
-; X64-HSW-NEXT: shll $5, %eax # sched: [1:0.50]
-; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
-; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_30:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.50]
-; X64-JAG-NEXT: shll $5, %eax # sched: [1:0.50]
-; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50]
-; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-LABEL: test_mul_by_30:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: shll $5, %eax
+; X64-NEXT: subl %edi, %eax
+; X64-NEXT: subl %edi, %eax
+; X64-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_30:
; X86-NOOPT: # %bb.0:
; X86-NOOPT-NEXT: imull $30, {{[0-9]+}}(%esp), %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_30:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imull $30, %edi, %eax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_30:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imull $30, %edi, %eax # sched: [3:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_30:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50]
-; X64-SLM-NEXT: shll $5, %eax # sched: [1:1.00]
-; X64-SLM-NEXT: subl %edi, %eax # sched: [1:0.50]
-; X64-SLM-NEXT: subl %edi, %eax # sched: [1:0.50]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_30:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imull $30, %edi, %eax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_30:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imull $30, %edi, %eax
+; X64-NOOPT-NEXT: retq
%mul = mul nsw i32 %x, 30
ret i32 %mul
}
@@ -1580,46 +984,22 @@ define i32 @test_mul_by_31(i32 %x) {
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_31:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
-; X64-HSW-NEXT: shll $5, %eax # sched: [1:0.50]
-; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_31:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.50]
-; X64-JAG-NEXT: shll $5, %eax # sched: [1:0.50]
-; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-LABEL: test_mul_by_31:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: shll $5, %eax
+; X64-NEXT: subl %edi, %eax
+; X64-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_31:
; X86-NOOPT: # %bb.0:
; X86-NOOPT-NEXT: imull $31, {{[0-9]+}}(%esp), %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_31:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imull $31, %edi, %eax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_31:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imull $31, %edi, %eax # sched: [3:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_31:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50]
-; X64-SLM-NEXT: shll $5, %eax # sched: [1:1.00]
-; X64-SLM-NEXT: subl %edi, %eax # sched: [1:0.50]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_31:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imull $31, %edi, %eax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_31:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imull $31, %edi, %eax
+; X64-NOOPT-NEXT: retq
%mul = mul nsw i32 %x, 31
ret i32 %mul
}
@@ -1631,17 +1011,11 @@ define i32 @test_mul_by_32(i32 %x) {
; X86-NEXT: shll $5, %eax
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_32:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
-; X64-HSW-NEXT: shll $5, %eax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_32:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.50]
-; X64-JAG-NEXT: shll $5, %eax # sched: [1:0.50]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-LABEL: test_mul_by_32:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: shll $5, %eax
+; X64-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_32:
; X86-NOOPT: # %bb.0:
@@ -1649,29 +1023,11 @@ define i32 @test_mul_by_32(i32 %x) {
; X86-NOOPT-NEXT: shll $5, %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_32:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25]
-; HSW-NOOPT-NEXT: shll $5, %eax # sched: [1:0.50]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_32:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50]
-; JAG-NOOPT-NEXT: shll $5, %eax # sched: [1:0.50]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_32:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50]
-; X64-SLM-NEXT: shll $5, %eax # sched: [1:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_32:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.50]
-; SLM-NOOPT-NEXT: shll $5, %eax # sched: [1:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_32:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: movl %edi, %eax
+; X64-NOOPT-NEXT: shll $5, %eax
+; X64-NOOPT-NEXT: retq
%mul = mul nsw i32 %x, 32
ret i32 %mul
}
@@ -1687,41 +1043,31 @@ define i32 @test_mul_by_37(i32 %x) {
; X64-HSW-LABEL: test_mul_by_37:
; X64-HSW: # %bb.0:
; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
+; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax
+; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax
+; X64-HSW-NEXT: retq
;
; X64-JAG-LABEL: test_mul_by_37:
; X64-JAG: # %bb.0:
; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax
+; X64-JAG-NEXT: leal (%rdi,%rax,4), %eax
+; X64-JAG-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_37:
; X86-NOOPT: # %bb.0:
; X86-NOOPT-NEXT: imull $37, {{[0-9]+}}(%esp), %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_37:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imull $37, %edi, %eax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_37:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imull $37, %edi, %eax # sched: [3:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_37:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imull $37, %edi, %eax
+; X64-NOOPT-NEXT: retq
;
; X64-SLM-LABEL: test_mul_by_37:
; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: imull $37, %edi, %eax # sched: [3:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_37:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imull $37, %edi, %eax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-SLM-NEXT: imull $37, %edi, %eax
+; X64-SLM-NEXT: retq
%mul = mul nsw i32 %x, 37
ret i32 %mul
}
@@ -1737,41 +1083,31 @@ define i32 @test_mul_by_41(i32 %x) {
; X64-HSW-LABEL: test_mul_by_41:
; X64-HSW: # %bb.0:
; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: leal (%rdi,%rax,8), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
+; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax
+; X64-HSW-NEXT: leal (%rdi,%rax,8), %eax
+; X64-HSW-NEXT: retq
;
; X64-JAG-LABEL: test_mul_by_41:
; X64-JAG: # %bb.0:
; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: leal (%rdi,%rax,8), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax
+; X64-JAG-NEXT: leal (%rdi,%rax,8), %eax
+; X64-JAG-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_41:
; X86-NOOPT: # %bb.0:
; X86-NOOPT-NEXT: imull $41, {{[0-9]+}}(%esp), %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_41:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imull $41, %edi, %eax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_41:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imull $41, %edi, %eax # sched: [3:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_41:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imull $41, %edi, %eax
+; X64-NOOPT-NEXT: retq
;
; X64-SLM-LABEL: test_mul_by_41:
; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: imull $41, %edi, %eax # sched: [3:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_41:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imull $41, %edi, %eax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-SLM-NEXT: imull $41, %edi, %eax
+; X64-SLM-NEXT: retq
%mul = mul nsw i32 %x, 41
ret i32 %mul
}
@@ -1786,49 +1122,23 @@ define i32 @test_mul_by_62(i32 %x) {
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_62:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
-; X64-HSW-NEXT: shll $6, %eax # sched: [1:0.50]
-; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
-; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_62:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.50]
-; X64-JAG-NEXT: shll $6, %eax # sched: [1:0.50]
-; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50]
-; X64-JAG-NEXT: subl %edi, %eax # sched: [1:0.50]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-LABEL: test_mul_by_62:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: shll $6, %eax
+; X64-NEXT: subl %edi, %eax
+; X64-NEXT: subl %edi, %eax
+; X64-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_62:
; X86-NOOPT: # %bb.0:
; X86-NOOPT-NEXT: imull $62, {{[0-9]+}}(%esp), %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_62:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imull $62, %edi, %eax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_62:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imull $62, %edi, %eax # sched: [3:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_62:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50]
-; X64-SLM-NEXT: shll $6, %eax # sched: [1:1.00]
-; X64-SLM-NEXT: subl %edi, %eax # sched: [1:0.50]
-; X64-SLM-NEXT: subl %edi, %eax # sched: [1:0.50]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_62:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imull $62, %edi, %eax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_62:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imull $62, %edi, %eax
+; X64-NOOPT-NEXT: retq
%mul = mul nsw i32 %x, 62
ret i32 %mul
}
@@ -1845,47 +1155,37 @@ define i32 @test_mul_by_66(i32 %x) {
; X64-HSW-LABEL: test_mul_by_66:
; X64-HSW: # %bb.0:
; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
-; X64-HSW-NEXT: shll $6, %eax # sched: [1:0.50]
-; X64-HSW-NEXT: leal (%rax,%rdi,2), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
+; X64-HSW-NEXT: movl %edi, %eax
+; X64-HSW-NEXT: shll $6, %eax
+; X64-HSW-NEXT: leal (%rax,%rdi,2), %eax
+; X64-HSW-NEXT: retq
;
; X64-JAG-LABEL: test_mul_by_66:
; X64-JAG: # %bb.0:
; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.50]
-; X64-JAG-NEXT: shll $6, %eax # sched: [1:0.50]
-; X64-JAG-NEXT: leal (%rax,%rdi,2), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-JAG-NEXT: movl %edi, %eax
+; X64-JAG-NEXT: shll $6, %eax
+; X64-JAG-NEXT: leal (%rax,%rdi,2), %eax
+; X64-JAG-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_66:
; X86-NOOPT: # %bb.0:
; X86-NOOPT-NEXT: imull $66, {{[0-9]+}}(%esp), %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_66:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imull $66, %edi, %eax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_66:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imull $66, %edi, %eax # sched: [3:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_66:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imull $66, %edi, %eax
+; X64-NOOPT-NEXT: retq
;
; X64-SLM-LABEL: test_mul_by_66:
; X64-SLM: # %bb.0:
; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-SLM-NEXT: movl %edi, %eax # sched: [1:0.50]
-; X64-SLM-NEXT: shll $6, %eax # sched: [1:1.00]
-; X64-SLM-NEXT: leal (%rax,%rdi), %eax # sched: [1:1.00]
-; X64-SLM-NEXT: addl %edi, %eax # sched: [1:0.50]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_66:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imull $66, %edi, %eax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-SLM-NEXT: movl %edi, %eax
+; X64-SLM-NEXT: shll $6, %eax
+; X64-SLM-NEXT: leal (%rax,%rdi), %eax
+; X64-SLM-NEXT: addl %edi, %eax
+; X64-SLM-NEXT: retq
%mul = mul nsw i32 %x, 66
ret i32 %mul
}
@@ -1901,41 +1201,31 @@ define i32 @test_mul_by_73(i32 %x) {
; X64-HSW-LABEL: test_mul_by_73:
; X64-HSW: # %bb.0:
; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: leal (%rdi,%rax,8), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
+; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax
+; X64-HSW-NEXT: leal (%rdi,%rax,8), %eax
+; X64-HSW-NEXT: retq
;
; X64-JAG-LABEL: test_mul_by_73:
; X64-JAG: # %bb.0:
; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: leal (%rdi,%rax,8), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax
+; X64-JAG-NEXT: leal (%rdi,%rax,8), %eax
+; X64-JAG-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_73:
; X86-NOOPT: # %bb.0:
; X86-NOOPT-NEXT: imull $73, {{[0-9]+}}(%esp), %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_73:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imull $73, %edi, %eax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_73:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imull $73, %edi, %eax # sched: [3:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_73:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imull $73, %edi, %eax
+; X64-NOOPT-NEXT: retq
;
; X64-SLM-LABEL: test_mul_by_73:
; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: imull $73, %edi, %eax # sched: [3:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_73:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imull $73, %edi, %eax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-SLM-NEXT: imull $73, %edi, %eax
+; X64-SLM-NEXT: retq
%mul = mul nsw i32 %x, 73
ret i32 %mul
}
@@ -1952,47 +1242,33 @@ define i32 @test_mul_by_520(i32 %x) {
; X64-HSW-LABEL: test_mul_by_520:
; X64-HSW: # %bb.0:
; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25]
-; X64-HSW-NEXT: shll $9, %eax # sched: [1:0.50]
-; X64-HSW-NEXT: leal (%rax,%rdi,8), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
+; X64-HSW-NEXT: movl %edi, %eax
+; X64-HSW-NEXT: shll $9, %eax
+; X64-HSW-NEXT: leal (%rax,%rdi,8), %eax
+; X64-HSW-NEXT: retq
;
; X64-JAG-LABEL: test_mul_by_520:
; X64-JAG: # %bb.0:
; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-JAG-NEXT: movl %edi, %eax # sched: [1:0.50]
-; X64-JAG-NEXT: shll $9, %eax # sched: [1:0.50]
-; X64-JAG-NEXT: leal (%rax,%rdi,8), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-JAG-NEXT: movl %edi, %eax
+; X64-JAG-NEXT: shll $9, %eax
+; X64-JAG-NEXT: leal (%rax,%rdi,8), %eax
+; X64-JAG-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_520:
; X86-NOOPT: # %bb.0:
; X86-NOOPT-NEXT: imull $520, {{[0-9]+}}(%esp), %eax # imm = 0x208
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_520:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imull $520, %edi, %eax # imm = 0x208
-; HSW-NOOPT-NEXT: # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_520:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imull $520, %edi, %eax # imm = 0x208
-; JAG-NOOPT-NEXT: # sched: [3:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_520:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imull $520, %edi, %eax # imm = 0x208
+; X64-NOOPT-NEXT: retq
;
; X64-SLM-LABEL: test_mul_by_520:
; X64-SLM: # %bb.0:
; X64-SLM-NEXT: imull $520, %edi, %eax # imm = 0x208
-; X64-SLM-NEXT: # sched: [3:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_520:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imull $520, %edi, %eax # imm = 0x208
-; SLM-NOOPT-NEXT: # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-SLM-NEXT: retq
%mul = mul nsw i32 %x, 520
ret i32 %mul
}
@@ -2006,49 +1282,23 @@ define i32 @test_mul_by_neg10(i32 %x) {
; X86-NEXT: negl %eax
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_neg10:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25]
-; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: negl %eax # sched: [1:0.25]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_neg10:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-JAG-NEXT: addl %edi, %edi # sched: [1:0.50]
-; X64-JAG-NEXT: leal (%rdi,%rdi,4), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: negl %eax # sched: [1:0.50]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-LABEL: test_mul_by_neg10:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: addl %edi, %edi
+; X64-NEXT: leal (%rdi,%rdi,4), %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_neg10:
; X86-NOOPT: # %bb.0:
; X86-NOOPT-NEXT: imull $-10, {{[0-9]+}}(%esp), %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_neg10:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imull $-10, %edi, %eax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_neg10:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imull $-10, %edi, %eax # sched: [3:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_neg10:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-SLM-NEXT: addl %edi, %edi # sched: [1:0.50]
-; X64-SLM-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:1.00]
-; X64-SLM-NEXT: negl %eax # sched: [1:0.50]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_neg10:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imull $-10, %edi, %eax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_neg10:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imull $-10, %edi, %eax
+; X64-NOOPT-NEXT: retq
%mul = mul nsw i32 %x, -10
ret i32 %mul
}
@@ -2062,49 +1312,23 @@ define i32 @test_mul_by_neg36(i32 %x) {
; X86-NEXT: negl %eax
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_neg36:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-HSW-NEXT: shll $2, %edi # sched: [1:0.50]
-; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: negl %eax # sched: [1:0.25]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_neg36:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-JAG-NEXT: shll $2, %edi # sched: [1:0.50]
-; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: negl %eax # sched: [1:0.50]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-LABEL: test_mul_by_neg36:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: shll $2, %edi
+; X64-NEXT: leal (%rdi,%rdi,8), %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_neg36:
; X86-NOOPT: # %bb.0:
; X86-NOOPT-NEXT: imull $-36, {{[0-9]+}}(%esp), %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_neg36:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imull $-36, %edi, %eax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_neg36:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imull $-36, %edi, %eax # sched: [3:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_neg36:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-SLM-NEXT: shll $2, %edi # sched: [1:1.00]
-; X64-SLM-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:1.00]
-; X64-SLM-NEXT: negl %eax # sched: [1:0.50]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_neg36:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imull $-36, %edi, %eax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_neg36:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imull $-36, %edi, %eax
+; X64-NOOPT-NEXT: retq
%mul = mul nsw i32 %x, -36
ret i32 %mul
}
@@ -2122,20 +1346,20 @@ define i32 @test_mul_spec(i32 %x) nounwind {
; X64-HSW-LABEL: test_mul_spec:
; X64-HSW: # %bb.0:
; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-HSW-NEXT: leal (%rdi,%rdi,8), %ecx # sched: [1:0.50]
-; X64-HSW-NEXT: addl $42, %ecx # sched: [1:0.25]
-; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: addl $2, %eax # sched: [1:0.25]
-; X64-HSW-NEXT: imull %ecx, %eax # sched: [3:1.00]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
+; X64-HSW-NEXT: leal (%rdi,%rdi,8), %ecx
+; X64-HSW-NEXT: addl $42, %ecx
+; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax
+; X64-HSW-NEXT: addl $2, %eax
+; X64-HSW-NEXT: imull %ecx, %eax
+; X64-HSW-NEXT: retq
;
; X64-JAG-LABEL: test_mul_spec:
; X64-JAG: # %bb.0:
; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-JAG-NEXT: leal 42(%rdi,%rdi,8), %ecx # sched: [2:1.00]
-; X64-JAG-NEXT: leal 2(%rdi,%rdi,4), %eax # sched: [2:1.00]
-; X64-JAG-NEXT: imull %ecx, %eax # sched: [3:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-JAG-NEXT: leal 42(%rdi,%rdi,8), %ecx
+; X64-JAG-NEXT: leal 2(%rdi,%rdi,4), %eax
+; X64-JAG-NEXT: imull %ecx, %eax
+; X64-JAG-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_spec:
; X86-NOOPT: # %bb.0:
@@ -2148,36 +1372,36 @@ define i32 @test_mul_spec(i32 %x) nounwind {
; HSW-NOOPT-LABEL: test_mul_spec:
; HSW-NOOPT: # %bb.0:
; HSW-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi
-; HSW-NOOPT-NEXT: leal (%rdi,%rdi,8), %ecx # sched: [1:0.50]
-; HSW-NOOPT-NEXT: addl $42, %ecx # sched: [1:0.25]
-; HSW-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50]
-; HSW-NOOPT-NEXT: addl $2, %eax # sched: [1:0.25]
-; HSW-NOOPT-NEXT: imull %ecx, %eax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
+; HSW-NOOPT-NEXT: leal (%rdi,%rdi,8), %ecx
+; HSW-NOOPT-NEXT: addl $42, %ecx
+; HSW-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax
+; HSW-NOOPT-NEXT: addl $2, %eax
+; HSW-NOOPT-NEXT: imull %ecx, %eax
+; HSW-NOOPT-NEXT: retq
;
; JAG-NOOPT-LABEL: test_mul_spec:
; JAG-NOOPT: # %bb.0:
; JAG-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi
-; JAG-NOOPT-NEXT: leal 42(%rdi,%rdi,8), %ecx # sched: [2:1.00]
-; JAG-NOOPT-NEXT: leal 2(%rdi,%rdi,4), %eax # sched: [2:1.00]
-; JAG-NOOPT-NEXT: imull %ecx, %eax # sched: [3:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+; JAG-NOOPT-NEXT: leal 42(%rdi,%rdi,8), %ecx
+; JAG-NOOPT-NEXT: leal 2(%rdi,%rdi,4), %eax
+; JAG-NOOPT-NEXT: imull %ecx, %eax
+; JAG-NOOPT-NEXT: retq
;
; X64-SLM-LABEL: test_mul_spec:
; X64-SLM: # %bb.0:
; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-SLM-NEXT: leal 42(%rdi,%rdi,8), %ecx # sched: [1:1.00]
-; X64-SLM-NEXT: leal 2(%rdi,%rdi,4), %eax # sched: [1:1.00]
-; X64-SLM-NEXT: imull %ecx, %eax # sched: [3:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
+; X64-SLM-NEXT: leal 42(%rdi,%rdi,8), %ecx
+; X64-SLM-NEXT: leal 2(%rdi,%rdi,4), %eax
+; X64-SLM-NEXT: imull %ecx, %eax
+; X64-SLM-NEXT: retq
;
; SLM-NOOPT-LABEL: test_mul_spec:
; SLM-NOOPT: # %bb.0:
; SLM-NOOPT-NEXT: # kill: def $edi killed $edi def $rdi
-; SLM-NOOPT-NEXT: leal 42(%rdi,%rdi,8), %ecx # sched: [1:1.00]
-; SLM-NOOPT-NEXT: leal 2(%rdi,%rdi,4), %eax # sched: [1:1.00]
-; SLM-NOOPT-NEXT: imull %ecx, %eax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; SLM-NOOPT-NEXT: leal 42(%rdi,%rdi,8), %ecx
+; SLM-NOOPT-NEXT: leal 2(%rdi,%rdi,4), %eax
+; SLM-NOOPT-NEXT: imull %ecx, %eax
+; SLM-NOOPT-NEXT: retq
%mul = mul nsw i32 %x, 9
%add = add nsw i32 %mul, 42
%mul2 = mul nsw i32 %x, 5
@@ -2200,19 +1424,19 @@ define i32 @mul_neg_fold(i32 %a, i32 %b) {
;
; X64-HSW-LABEL: mul_neg_fold:
; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: movl %esi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: movl %esi, %eax
; X64-HSW-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-HSW-NEXT: leal (%rdi,%rdi,8), %ecx # sched: [1:0.50]
-; X64-HSW-NEXT: subl %ecx, %eax # sched: [1:0.25]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
+; X64-HSW-NEXT: leal (%rdi,%rdi,8), %ecx
+; X64-HSW-NEXT: subl %ecx, %eax
+; X64-HSW-NEXT: retq
;
; X64-JAG-LABEL: mul_neg_fold:
; X64-JAG: # %bb.0:
; X64-JAG-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-JAG-NEXT: leal (%rdi,%rdi,8), %ecx # sched: [2:1.00]
-; X64-JAG-NEXT: movl %esi, %eax # sched: [1:0.50]
-; X64-JAG-NEXT: subl %ecx, %eax # sched: [1:0.50]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-JAG-NEXT: leal (%rdi,%rdi,8), %ecx
+; X64-JAG-NEXT: movl %esi, %eax
+; X64-JAG-NEXT: subl %ecx, %eax
+; X64-JAG-NEXT: retq
;
; X86-NOOPT-LABEL: mul_neg_fold:
; X86-NOOPT: # %bb.0:
@@ -2220,31 +1444,19 @@ define i32 @mul_neg_fold(i32 %a, i32 %b) {
; X86-NOOPT-NEXT: addl {{[0-9]+}}(%esp), %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: mul_neg_fold:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imull $-9, %edi, %eax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: addl %esi, %eax # sched: [1:0.25]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: mul_neg_fold:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imull $-9, %edi, %eax # sched: [3:1.00]
-; JAG-NOOPT-NEXT: addl %esi, %eax # sched: [1:0.50]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: mul_neg_fold:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imull $-9, %edi, %eax
+; X64-NOOPT-NEXT: addl %esi, %eax
+; X64-NOOPT-NEXT: retq
;
; X64-SLM-LABEL: mul_neg_fold:
; X64-SLM: # %bb.0:
; X64-SLM-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-SLM-NEXT: movl %esi, %eax # sched: [1:0.50]
-; X64-SLM-NEXT: leal (%rdi,%rdi,8), %ecx # sched: [1:1.00]
-; X64-SLM-NEXT: subl %ecx, %eax # sched: [1:0.50]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: mul_neg_fold:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imull $-9, %edi, %eax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: addl %esi, %eax # sched: [1:0.50]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-SLM-NEXT: movl %esi, %eax
+; X64-SLM-NEXT: leal (%rdi,%rdi,8), %ecx
+; X64-SLM-NEXT: subl %ecx, %eax
+; X64-SLM-NEXT: retq
%c = mul i32 %a, -9
%d = add i32 %b, %c
ret i32 %d
diff --git a/llvm/test/CodeGen/X86/mul-constant-i64.ll b/llvm/test/CodeGen/X86/mul-constant-i64.ll
index 0e4680becaf..9ca2e695a27 100644
--- a/llvm/test/CodeGen/X86/mul-constant-i64.ll
+++ b/llvm/test/CodeGen/X86/mul-constant-i64.ll
@@ -1,12 +1,12 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86
-; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=haswell| FileCheck %s --check-prefix=X64-HSW
-; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=btver2| FileCheck %s --check-prefix=X64-JAG
-; RUN: llc < %s -mtriple=i686-unknown -mul-constant-optimization=false | FileCheck %s --check-prefix=X86-NOOPT
-; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=haswell| FileCheck %s --check-prefix=HSW-NOOPT
-; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=btver2| FileCheck %s --check-prefix=JAG-NOOPT
-; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule=true -mcpu=slm| FileCheck %s --check-prefix=X64-SLM
-; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -print-schedule=true -mcpu=slm| FileCheck %s --check-prefix=SLM-NOOPT
+; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=X86
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=haswell | FileCheck %s --check-prefixes=X64-OPT,X64-HSW
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=btver2 | FileCheck %s --check-prefixes=X64-OPT,X64-JAG
+; RUN: llc < %s -mtriple=i686-unknown -mul-constant-optimization=false | FileCheck %s --check-prefix=X86-NOOPT
+; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -mcpu=haswell | FileCheck %s --check-prefixes=X64-NOOPT,HSW-NOOPT
+; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -mcpu=btver2 | FileCheck %s --check-prefixes=X64-NOOPT,JAG-NOOPT
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=slm | FileCheck %s --check-prefixes=X64-OPT,X64-SLM
+; RUN: llc < %s -mtriple=x86_64-unknown -mul-constant-optimization=false -mcpu=slm | FileCheck %s --check-prefixes=X64-NOOPT,SLM-NOOPT
define i64 @test_mul_by_1(i64 %x) nounwind {
; X86-LABEL: test_mul_by_1:
@@ -15,15 +15,10 @@ define i64 @test_mul_by_1(i64 %x) nounwind {
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_1:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_1:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-OPT-LABEL: test_mul_by_1:
+; X64-OPT: # %bb.0:
+; X64-OPT-NEXT: movq %rdi, %rax
+; X64-OPT-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_1:
; X86-NOOPT: # %bb.0:
@@ -31,25 +26,14 @@ define i64 @test_mul_by_1(i64 %x) nounwind {
; X86-NOOPT-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_1:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_1:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_1:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_1:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_1:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: movq %rdi, %rax
+; X64-NOOPT-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_1:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: movq %rdi, %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 1
ret i64 %mul
}
@@ -63,15 +47,10 @@ define i64 @test_mul_by_2(i64 %x) {
; X86-NEXT: addl %eax, %eax
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_2:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_2:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-OPT-LABEL: test_mul_by_2:
+; X64-OPT: # %bb.0:
+; X64-OPT-NEXT: leaq (%rdi,%rdi), %rax
+; X64-OPT-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_2:
; X86-NOOPT: # %bb.0:
@@ -81,25 +60,14 @@ define i64 @test_mul_by_2(i64 %x) {
; X86-NOOPT-NEXT: addl %eax, %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_2:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_2:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_2:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_2:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_2:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: leaq (%rdi,%rdi), %rax
+; X64-NOOPT-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_2:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: leaq (%rdi,%rdi), %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 2
ret i64 %mul
}
@@ -114,15 +82,10 @@ define i64 @test_mul_by_3(i64 %x) {
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_3:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_3:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-OPT-LABEL: test_mul_by_3:
+; X64-OPT: # %bb.0:
+; X64-OPT-NEXT: leaq (%rdi,%rdi,2), %rax
+; X64-OPT-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_3:
; X86-NOOPT: # %bb.0:
@@ -132,25 +95,14 @@ define i64 @test_mul_by_3(i64 %x) {
; X86-NOOPT-NEXT: addl %ecx, %edx
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_3:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_3:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [2:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_3:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_3:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_3:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: leaq (%rdi,%rdi,2), %rax
+; X64-NOOPT-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_3:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: leaq (%rdi,%rdi,2), %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 3
ret i64 %mul
}
@@ -164,15 +116,10 @@ define i64 @test_mul_by_4(i64 %x) {
; X86-NEXT: shll $2, %eax
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_4:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_4:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: leaq (,%rdi,4), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-OPT-LABEL: test_mul_by_4:
+; X64-OPT: # %bb.0:
+; X64-OPT-NEXT: leaq (,%rdi,4), %rax
+; X64-OPT-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_4:
; X86-NOOPT: # %bb.0:
@@ -182,25 +129,14 @@ define i64 @test_mul_by_4(i64 %x) {
; X86-NOOPT-NEXT: shll $2, %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_4:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_4:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: leaq (,%rdi,4), %rax # sched: [2:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_4:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: leaq (,%rdi,4), %rax # sched: [1:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_4:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: leaq (,%rdi,4), %rax # sched: [1:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_4:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: leaq (,%rdi,4), %rax
+; X64-NOOPT-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_4:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: leaq (,%rdi,4), %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 4
ret i64 %mul
}
@@ -215,15 +151,10 @@ define i64 @test_mul_by_5(i64 %x) {
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_5:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_5:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-OPT-LABEL: test_mul_by_5:
+; X64-OPT: # %bb.0:
+; X64-OPT-NEXT: leaq (%rdi,%rdi,4), %rax
+; X64-OPT-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_5:
; X86-NOOPT: # %bb.0:
@@ -233,25 +164,14 @@ define i64 @test_mul_by_5(i64 %x) {
; X86-NOOPT-NEXT: addl %ecx, %edx
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_5:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_5:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [2:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_5:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_5:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_5:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax
+; X64-NOOPT-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_5:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: leaq (%rdi,%rdi,4), %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 5
ret i64 %mul
}
@@ -266,17 +186,11 @@ define i64 @test_mul_by_6(i64 %x) {
; X86-NEXT: leal (%edx,%ecx,2), %edx
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_6:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25]
-; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_6:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: addq %rdi, %rdi # sched: [1:0.50]
-; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-OPT-LABEL: test_mul_by_6:
+; X64-OPT: # %bb.0:
+; X64-OPT-NEXT: addq %rdi, %rdi
+; X64-OPT-NEXT: leaq (%rdi,%rdi,2), %rax
+; X64-OPT-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_6:
; X86-NOOPT: # %bb.0:
@@ -286,26 +200,15 @@ define i64 @test_mul_by_6(i64 %x) {
; X86-NOOPT-NEXT: addl %ecx, %edx
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_6:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imulq $6, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_6:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $6, %rdi, %rax # sched: [6:4.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_6:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: addq %rdi, %rdi # sched: [1:0.50]
-; X64-SLM-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_6:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imulq $6, %rdi, %rax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_6:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imulq $6, %rdi, %rax
+; X64-NOOPT-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_6:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: addq %rdi, %rdi
+; X86-OPT-NEXT: leaq (%rdi,%rdi,2), %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 6
ret i64 %mul
}
@@ -321,17 +224,11 @@ define i64 @test_mul_by_7(i64 %x) {
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_7:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_7:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: leaq (,%rdi,8), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-OPT-LABEL: test_mul_by_7:
+; X64-OPT: # %bb.0:
+; X64-OPT-NEXT: leaq (,%rdi,8), %rax
+; X64-OPT-NEXT: subq %rdi, %rax
+; X64-OPT-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_7:
; X86-NOOPT: # %bb.0:
@@ -341,26 +238,15 @@ define i64 @test_mul_by_7(i64 %x) {
; X86-NOOPT-NEXT: addl %ecx, %edx
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_7:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imulq $7, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_7:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $7, %rdi, %rax # sched: [6:4.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_7:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: leaq (,%rdi,8), %rax # sched: [1:1.00]
-; X64-SLM-NEXT: subq %rdi, %rax # sched: [1:0.50]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_7:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imulq $7, %rdi, %rax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_7:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imulq $7, %rdi, %rax
+; X64-NOOPT-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_7:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: leaq (,%rdi,8), %rax
+; X86-OPT-NEXT: subq %rdi, %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 7
ret i64 %mul
}
@@ -374,15 +260,10 @@ define i64 @test_mul_by_8(i64 %x) {
; X86-NEXT: shll $3, %eax
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_8:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_8:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: leaq (,%rdi,8), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-OPT-LABEL: test_mul_by_8:
+; X64-OPT: # %bb.0:
+; X64-OPT-NEXT: leaq (,%rdi,8), %rax
+; X64-OPT-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_8:
; X86-NOOPT: # %bb.0:
@@ -392,25 +273,14 @@ define i64 @test_mul_by_8(i64 %x) {
; X86-NOOPT-NEXT: shll $3, %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_8:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_8:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: leaq (,%rdi,8), %rax # sched: [2:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_8:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: leaq (,%rdi,8), %rax # sched: [1:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_8:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: leaq (,%rdi,8), %rax # sched: [1:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_8:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: leaq (,%rdi,8), %rax
+; X64-NOOPT-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_8:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: leaq (,%rdi,8), %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 8
ret i64 %mul
}
@@ -425,15 +295,10 @@ define i64 @test_mul_by_9(i64 %x) {
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_9:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_9:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-OPT-LABEL: test_mul_by_9:
+; X64-OPT: # %bb.0:
+; X64-OPT-NEXT: leaq (%rdi,%rdi,8), %rax
+; X64-OPT-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_9:
; X86-NOOPT: # %bb.0:
@@ -443,25 +308,14 @@ define i64 @test_mul_by_9(i64 %x) {
; X86-NOOPT-NEXT: addl %ecx, %edx
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_9:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_9:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [2:1.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_9:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_9:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_9:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rax
+; X64-NOOPT-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_9:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: leaq (%rdi,%rdi,8), %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 9
ret i64 %mul
}
@@ -476,17 +330,11 @@ define i64 @test_mul_by_10(i64 %x) {
; X86-NEXT: leal (%edx,%ecx,2), %edx
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_10:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25]
-; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_10:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: addq %rdi, %rdi # sched: [1:0.50]
-; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-OPT-LABEL: test_mul_by_10:
+; X64-OPT: # %bb.0:
+; X64-OPT-NEXT: addq %rdi, %rdi
+; X64-OPT-NEXT: leaq (%rdi,%rdi,4), %rax
+; X64-OPT-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_10:
; X86-NOOPT: # %bb.0:
@@ -496,26 +344,15 @@ define i64 @test_mul_by_10(i64 %x) {
; X86-NOOPT-NEXT: addl %ecx, %edx
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_10:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imulq $10, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_10:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $10, %rdi, %rax # sched: [6:4.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_10:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: addq %rdi, %rdi # sched: [1:0.50]
-; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_10:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imulq $10, %rdi, %rax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_10:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imulq $10, %rdi, %rax
+; X64-NOOPT-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_10:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: addq %rdi, %rdi
+; X86-OPT-NEXT: leaq (%rdi,%rdi,4), %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 10
ret i64 %mul
}
@@ -533,15 +370,15 @@ define i64 @test_mul_by_11(i64 %x) {
;
; X64-HSW-LABEL: test_mul_by_11:
; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: leaq (%rdi,%rax,2), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
+; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax
+; X64-HSW-NEXT: leaq (%rdi,%rax,2), %rax
+; X64-HSW-NEXT: retq
;
; X64-JAG-LABEL: test_mul_by_11:
; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: leaq (%rdi,%rax,2), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax
+; X64-JAG-NEXT: leaq (%rdi,%rax,2), %rax
+; X64-JAG-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_11:
; X86-NOOPT: # %bb.0:
@@ -551,25 +388,20 @@ define i64 @test_mul_by_11(i64 %x) {
; X86-NOOPT-NEXT: addl %ecx, %edx
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_11:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imulq $11, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_11:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $11, %rdi, %rax # sched: [6:4.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_11:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imulq $11, %rdi, %rax
+; X64-NOOPT-NEXT: retq
;
; X64-SLM-LABEL: test_mul_by_11:
; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: imulq $11, %rdi, %rax # sched: [3:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_11:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imulq $11, %rdi, %rax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-SLM-NEXT: imulq $11, %rdi, %rax
+; X64-SLM-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_11:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: leaq (%rdi,%rdi,4), %rax
+; X86-OPT-NEXT: leaq (%rdi,%rax,2), %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 11
ret i64 %mul
}
@@ -584,17 +416,11 @@ define i64 @test_mul_by_12(i64 %x) {
; X86-NEXT: leal (%edx,%ecx,4), %edx
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_12:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: shlq $2, %rdi # sched: [1:0.50]
-; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_12:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: shlq $2, %rdi # sched: [1:0.50]
-; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-OPT-LABEL: test_mul_by_12:
+; X64-OPT: # %bb.0:
+; X64-OPT-NEXT: shlq $2, %rdi
+; X64-OPT-NEXT: leaq (%rdi,%rdi,2), %rax
+; X64-OPT-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_12:
; X86-NOOPT: # %bb.0:
@@ -604,26 +430,15 @@ define i64 @test_mul_by_12(i64 %x) {
; X86-NOOPT-NEXT: addl %ecx, %edx
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_12:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imulq $12, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_12:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $12, %rdi, %rax # sched: [6:4.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_12:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: shlq $2, %rdi # sched: [1:1.00]
-; X64-SLM-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_12:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imulq $12, %rdi, %rax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_12:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imulq $12, %rdi, %rax
+; X64-NOOPT-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_12:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: shlq $2, %rdi
+; X86-OPT-NEXT: leaq (%rdi,%rdi,2), %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 12
ret i64 %mul
}
@@ -641,15 +456,15 @@ define i64 @test_mul_by_13(i64 %x) {
;
; X64-HSW-LABEL: test_mul_by_13:
; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
+; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax
+; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax
+; X64-HSW-NEXT: retq
;
; X64-JAG-LABEL: test_mul_by_13:
; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax
+; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax
+; X64-JAG-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_13:
; X86-NOOPT: # %bb.0:
@@ -659,25 +474,20 @@ define i64 @test_mul_by_13(i64 %x) {
; X86-NOOPT-NEXT: addl %ecx, %edx
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_13:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imulq $13, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_13:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $13, %rdi, %rax # sched: [6:4.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_13:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imulq $13, %rdi, %rax
+; X64-NOOPT-NEXT: retq
;
; X64-SLM-LABEL: test_mul_by_13:
; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: imulq $13, %rdi, %rax # sched: [3:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_13:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imulq $13, %rdi, %rax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-SLM-NEXT: imulq $13, %rdi, %rax
+; X64-SLM-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_13:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: leaq (%rdi,%rdi,2), %rax
+; X86-OPT-NEXT: leaq (%rdi,%rax,4), %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 13
ret i64 %mul
}
@@ -695,21 +505,13 @@ define i64 @test_mul_by_14(i64 %x) {
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_14:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
-; X64-HSW-NEXT: shlq $4, %rax # sched: [1:0.50]
-; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
-; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_14:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; X64-JAG-NEXT: shlq $4, %rax # sched: [1:0.50]
-; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50]
-; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-OPT-LABEL: test_mul_by_14:
+; X64-OPT: # %bb.0:
+; X64-OPT-NEXT: movq %rdi, %rax
+; X64-OPT-NEXT: shlq $4, %rax
+; X64-OPT-NEXT: subq %rdi, %rax
+; X64-OPT-NEXT: subq %rdi, %rax
+; X64-OPT-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_14:
; X86-NOOPT: # %bb.0:
@@ -719,28 +521,17 @@ define i64 @test_mul_by_14(i64 %x) {
; X86-NOOPT-NEXT: addl %ecx, %edx
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_14:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imulq $14, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_14:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $14, %rdi, %rax # sched: [6:4.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_14:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; X64-SLM-NEXT: shlq $4, %rax # sched: [1:1.00]
-; X64-SLM-NEXT: subq %rdi, %rax # sched: [1:0.50]
-; X64-SLM-NEXT: subq %rdi, %rax # sched: [1:0.50]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_14:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imulq $14, %rdi, %rax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_14:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imulq $14, %rdi, %rax
+; X64-NOOPT-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_14:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: movq %rdi, %rax
+; X86-OPT-NEXT: shlq $4, %rax
+; X86-OPT-NEXT: subq %rdi, %rax
+; X86-OPT-NEXT: subq %rdi, %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 14
ret i64 %mul
}
@@ -756,17 +547,11 @@ define i64 @test_mul_by_15(i64 %x) {
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_15:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_15:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-OPT-LABEL: test_mul_by_15:
+; X64-OPT: # %bb.0:
+; X64-OPT-NEXT: leaq (%rdi,%rdi,4), %rax
+; X64-OPT-NEXT: leaq (%rax,%rax,2), %rax
+; X64-OPT-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_15:
; X86-NOOPT: # %bb.0:
@@ -776,26 +561,15 @@ define i64 @test_mul_by_15(i64 %x) {
; X86-NOOPT-NEXT: addl %ecx, %edx
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_15:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imulq $15, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_15:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $15, %rdi, %rax # sched: [6:4.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_15:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00]
-; X64-SLM-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_15:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imulq $15, %rdi, %rax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_15:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imulq $15, %rdi, %rax
+; X64-NOOPT-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_15:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: leaq (%rdi,%rdi,4), %rax
+; X86-OPT-NEXT: leaq (%rax,%rax,2), %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 15
ret i64 %mul
}
@@ -809,17 +583,11 @@ define i64 @test_mul_by_16(i64 %x) {
; X86-NEXT: shll $4, %eax
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_16:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
-; X64-HSW-NEXT: shlq $4, %rax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_16:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; X64-JAG-NEXT: shlq $4, %rax # sched: [1:0.50]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-OPT-LABEL: test_mul_by_16:
+; X64-OPT: # %bb.0:
+; X64-OPT-NEXT: movq %rdi, %rax
+; X64-OPT-NEXT: shlq $4, %rax
+; X64-OPT-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_16:
; X86-NOOPT: # %bb.0:
@@ -829,29 +597,16 @@ define i64 @test_mul_by_16(i64 %x) {
; X86-NOOPT-NEXT: shll $4, %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_16:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25]
-; HSW-NOOPT-NEXT: shlq $4, %rax # sched: [1:0.50]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_16:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; JAG-NOOPT-NEXT: shlq $4, %rax # sched: [1:0.50]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_16:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; X64-SLM-NEXT: shlq $4, %rax # sched: [1:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_16:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; SLM-NOOPT-NEXT: shlq $4, %rax # sched: [1:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_16:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: movq %rdi, %rax
+; X64-NOOPT-NEXT: shlq $4, %rax
+; X64-NOOPT-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_16:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: movq %rdi, %rax
+; X86-OPT-NEXT: shlq $4, %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 16
ret i64 %mul
}
@@ -870,17 +625,17 @@ define i64 @test_mul_by_17(i64 %x) {
;
; X64-HSW-LABEL: test_mul_by_17:
; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
-; X64-HSW-NEXT: shlq $4, %rax # sched: [1:0.50]
-; X64-HSW-NEXT: leaq (%rax,%rdi), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
+; X64-HSW-NEXT: movq %rdi, %rax
+; X64-HSW-NEXT: shlq $4, %rax
+; X64-HSW-NEXT: leaq (%rax,%rdi), %rax
+; X64-HSW-NEXT: retq
;
; X64-JAG-LABEL: test_mul_by_17:
; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; X64-JAG-NEXT: shlq $4, %rax # sched: [1:0.50]
-; X64-JAG-NEXT: leaq (%rax,%rdi), %rax # sched: [1:0.50]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-JAG-NEXT: movq %rdi, %rax
+; X64-JAG-NEXT: shlq $4, %rax
+; X64-JAG-NEXT: leaq (%rax,%rdi), %rax
+; X64-JAG-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_17:
; X86-NOOPT: # %bb.0:
@@ -890,27 +645,23 @@ define i64 @test_mul_by_17(i64 %x) {
; X86-NOOPT-NEXT: addl %ecx, %edx
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_17:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imulq $17, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_17:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $17, %rdi, %rax # sched: [6:4.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_17:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imulq $17, %rdi, %rax
+; X64-NOOPT-NEXT: retq
;
; X64-SLM-LABEL: test_mul_by_17:
; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; X64-SLM-NEXT: shlq $4, %rax # sched: [1:1.00]
-; X64-SLM-NEXT: addq %rdi, %rax # sched: [1:0.50]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_17:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imulq $17, %rdi, %rax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-SLM-NEXT: movq %rdi, %rax
+; X64-SLM-NEXT: shlq $4, %rax
+; X64-SLM-NEXT: addq %rdi, %rax
+; X64-SLM-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_17:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: movq %rdi, %rax
+; X86-OPT-NEXT: shlq $4, %rax
+; X86-OPT-NEXT: leaq (%rax,%rdi), %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 17
ret i64 %mul
}
@@ -925,17 +676,11 @@ define i64 @test_mul_by_18(i64 %x) {
; X86-NEXT: leal (%edx,%ecx,2), %edx
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_18:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25]
-; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_18:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: addq %rdi, %rdi # sched: [1:0.50]
-; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-OPT-LABEL: test_mul_by_18:
+; X64-OPT: # %bb.0:
+; X64-OPT-NEXT: addq %rdi, %rdi
+; X64-OPT-NEXT: leaq (%rdi,%rdi,8), %rax
+; X64-OPT-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_18:
; X86-NOOPT: # %bb.0:
@@ -945,26 +690,15 @@ define i64 @test_mul_by_18(i64 %x) {
; X86-NOOPT-NEXT: addl %ecx, %edx
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_18:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imulq $18, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_18:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $18, %rdi, %rax # sched: [6:4.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_18:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: addq %rdi, %rdi # sched: [1:0.50]
-; X64-SLM-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_18:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imulq $18, %rdi, %rax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_18:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imulq $18, %rdi, %rax
+; X64-NOOPT-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_18:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: addq %rdi, %rdi
+; X86-OPT-NEXT: leaq (%rdi,%rdi,8), %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 18
ret i64 %mul
}
@@ -982,15 +716,15 @@ define i64 @test_mul_by_19(i64 %x) {
;
; X64-HSW-LABEL: test_mul_by_19:
; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: leaq (%rdi,%rax,2), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
+; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax
+; X64-HSW-NEXT: leaq (%rdi,%rax,2), %rax
+; X64-HSW-NEXT: retq
;
; X64-JAG-LABEL: test_mul_by_19:
; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: leaq (%rdi,%rax,2), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax
+; X64-JAG-NEXT: leaq (%rdi,%rax,2), %rax
+; X64-JAG-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_19:
; X86-NOOPT: # %bb.0:
@@ -1000,25 +734,20 @@ define i64 @test_mul_by_19(i64 %x) {
; X86-NOOPT-NEXT: addl %ecx, %edx
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_19:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imulq $19, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_19:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $19, %rdi, %rax # sched: [6:4.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_19:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imulq $19, %rdi, %rax
+; X64-NOOPT-NEXT: retq
;
; X64-SLM-LABEL: test_mul_by_19:
; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: imulq $19, %rdi, %rax # sched: [3:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_19:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imulq $19, %rdi, %rax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-SLM-NEXT: imulq $19, %rdi, %rax
+; X64-SLM-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_19:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: leaq (%rdi,%rdi,8), %rax
+; X86-OPT-NEXT: leaq (%rdi,%rax,2), %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 19
ret i64 %mul
}
@@ -1033,17 +762,11 @@ define i64 @test_mul_by_20(i64 %x) {
; X86-NEXT: leal (%edx,%ecx,4), %edx
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_20:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: shlq $2, %rdi # sched: [1:0.50]
-; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_20:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: shlq $2, %rdi # sched: [1:0.50]
-; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-OPT-LABEL: test_mul_by_20:
+; X64-OPT: # %bb.0:
+; X64-OPT-NEXT: shlq $2, %rdi
+; X64-OPT-NEXT: leaq (%rdi,%rdi,4), %rax
+; X64-OPT-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_20:
; X86-NOOPT: # %bb.0:
@@ -1053,26 +776,15 @@ define i64 @test_mul_by_20(i64 %x) {
; X86-NOOPT-NEXT: addl %ecx, %edx
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_20:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imulq $20, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_20:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $20, %rdi, %rax # sched: [6:4.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_20:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: shlq $2, %rdi # sched: [1:1.00]
-; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_20:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imulq $20, %rdi, %rax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_20:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imulq $20, %rdi, %rax
+; X64-NOOPT-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_20:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: shlq $2, %rdi
+; X86-OPT-NEXT: leaq (%rdi,%rdi,4), %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 20
ret i64 %mul
}
@@ -1090,15 +802,15 @@ define i64 @test_mul_by_21(i64 %x) {
;
; X64-HSW-LABEL: test_mul_by_21:
; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
+; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax
+; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax
+; X64-HSW-NEXT: retq
;
; X64-JAG-LABEL: test_mul_by_21:
; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax
+; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax
+; X64-JAG-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_21:
; X86-NOOPT: # %bb.0:
@@ -1108,25 +820,20 @@ define i64 @test_mul_by_21(i64 %x) {
; X86-NOOPT-NEXT: addl %ecx, %edx
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_21:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imulq $21, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_21:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $21, %rdi, %rax # sched: [6:4.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_21:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imulq $21, %rdi, %rax
+; X64-NOOPT-NEXT: retq
;
; X64-SLM-LABEL: test_mul_by_21:
; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: imulq $21, %rdi, %rax # sched: [3:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_21:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imulq $21, %rdi, %rax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-SLM-NEXT: imulq $21, %rdi, %rax
+; X64-SLM-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_21:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: leaq (%rdi,%rdi,4), %rax
+; X86-OPT-NEXT: leaq (%rdi,%rax,4), %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 21
ret i64 %mul
}
@@ -1145,17 +852,17 @@ define i64 @test_mul_by_22(i64 %x) {
;
; X64-HSW-LABEL: test_mul_by_22:
; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
+; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax
+; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax
+; X64-HSW-NEXT: addq %rdi, %rax
+; X64-HSW-NEXT: retq
;
; X64-JAG-LABEL: test_mul_by_22:
; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax
+; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax
+; X64-JAG-NEXT: addq %rdi, %rax
+; X64-JAG-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_22:
; X86-NOOPT: # %bb.0:
@@ -1165,25 +872,21 @@ define i64 @test_mul_by_22(i64 %x) {
; X86-NOOPT-NEXT: addl %ecx, %edx
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_22:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imulq $22, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_22:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $22, %rdi, %rax # sched: [6:4.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_22:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imulq $22, %rdi, %rax
+; X64-NOOPT-NEXT: retq
;
; X64-SLM-LABEL: test_mul_by_22:
; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: imulq $22, %rdi, %rax # sched: [3:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_22:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imulq $22, %rdi, %rax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-SLM-NEXT: imulq $22, %rdi, %rax
+; X64-SLM-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_22:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: leaq (%rdi,%rdi,4), %rax
+; X86-OPT-NEXT: leaq (%rdi,%rax,4), %rax
+; X86-OPT-NEXT: addq %rdi, %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 22
ret i64 %mul
}
@@ -1202,17 +905,17 @@ define i64 @test_mul_by_23(i64 %x) {
;
; X64-HSW-LABEL: test_mul_by_23:
; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: shlq $3, %rax # sched: [1:0.50]
-; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
+; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax
+; X64-HSW-NEXT: shlq $3, %rax
+; X64-HSW-NEXT: subq %rdi, %rax
+; X64-HSW-NEXT: retq
;
; X64-JAG-LABEL: test_mul_by_23:
; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: shlq $3, %rax # sched: [1:0.50]
-; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax
+; X64-JAG-NEXT: shlq $3, %rax
+; X64-JAG-NEXT: subq %rdi, %rax
+; X64-JAG-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_23:
; X86-NOOPT: # %bb.0:
@@ -1222,25 +925,21 @@ define i64 @test_mul_by_23(i64 %x) {
; X86-NOOPT-NEXT: addl %ecx, %edx
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_23:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imulq $23, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_23:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $23, %rdi, %rax # sched: [6:4.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_23:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imulq $23, %rdi, %rax
+; X64-NOOPT-NEXT: retq
;
; X64-SLM-LABEL: test_mul_by_23:
; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: imulq $23, %rdi, %rax # sched: [3:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_23:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imulq $23, %rdi, %rax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-SLM-NEXT: imulq $23, %rdi, %rax
+; X64-SLM-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_23:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: leaq (%rdi,%rdi,2), %rax
+; X86-OPT-NEXT: shlq $3, %rax
+; X86-OPT-NEXT: subq %rdi, %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 23
ret i64 %mul
}
@@ -1255,17 +954,11 @@ define i64 @test_mul_by_24(i64 %x) {
; X86-NEXT: leal (%edx,%ecx,8), %edx
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_24:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: shlq $3, %rdi # sched: [1:0.50]
-; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_24:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: shlq $3, %rdi # sched: [1:0.50]
-; X64-JAG-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-OPT-LABEL: test_mul_by_24:
+; X64-OPT: # %bb.0:
+; X64-OPT-NEXT: shlq $3, %rdi
+; X64-OPT-NEXT: leaq (%rdi,%rdi,2), %rax
+; X64-OPT-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_24:
; X86-NOOPT: # %bb.0:
@@ -1275,26 +968,15 @@ define i64 @test_mul_by_24(i64 %x) {
; X86-NOOPT-NEXT: addl %ecx, %edx
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_24:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imulq $24, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_24:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $24, %rdi, %rax # sched: [6:4.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_24:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: shlq $3, %rdi # sched: [1:1.00]
-; X64-SLM-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_24:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imulq $24, %rdi, %rax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_24:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imulq $24, %rdi, %rax
+; X64-NOOPT-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_24:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: shlq $3, %rdi
+; X86-OPT-NEXT: leaq (%rdi,%rdi,2), %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 24
ret i64 %mul
}
@@ -1310,17 +992,11 @@ define i64 @test_mul_by_25(i64 %x) {
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_25:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: leaq (%rax,%rax,4), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_25:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: leaq (%rax,%rax,4), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-OPT-LABEL: test_mul_by_25:
+; X64-OPT: # %bb.0:
+; X64-OPT-NEXT: leaq (%rdi,%rdi,4), %rax
+; X64-OPT-NEXT: leaq (%rax,%rax,4), %rax
+; X64-OPT-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_25:
; X86-NOOPT: # %bb.0:
@@ -1330,26 +1006,15 @@ define i64 @test_mul_by_25(i64 %x) {
; X86-NOOPT-NEXT: addl %ecx, %edx
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_25:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imulq $25, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_25:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $25, %rdi, %rax # sched: [6:4.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_25:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00]
-; X64-SLM-NEXT: leaq (%rax,%rax,4), %rax # sched: [1:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_25:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imulq $25, %rdi, %rax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_25:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imulq $25, %rdi, %rax
+; X64-NOOPT-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_25:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: leaq (%rdi,%rdi,4), %rax
+; X86-OPT-NEXT: leaq (%rax,%rax,4), %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 25
ret i64 %mul
}
@@ -1368,17 +1033,17 @@ define i64 @test_mul_by_26(i64 %x) {
;
; X64-HSW-LABEL: test_mul_by_26:
; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: leaq (%rax,%rax,4), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
+; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax
+; X64-HSW-NEXT: leaq (%rax,%rax,4), %rax
+; X64-HSW-NEXT: addq %rdi, %rax
+; X64-HSW-NEXT: retq
;
; X64-JAG-LABEL: test_mul_by_26:
; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: leaq (%rax,%rax,4), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax
+; X64-JAG-NEXT: leaq (%rax,%rax,4), %rax
+; X64-JAG-NEXT: addq %rdi, %rax
+; X64-JAG-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_26:
; X86-NOOPT: # %bb.0:
@@ -1388,25 +1053,21 @@ define i64 @test_mul_by_26(i64 %x) {
; X86-NOOPT-NEXT: addl %ecx, %edx
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_26:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imulq $26, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_26:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $26, %rdi, %rax # sched: [6:4.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_26:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imulq $26, %rdi, %rax
+; X64-NOOPT-NEXT: retq
;
; X64-SLM-LABEL: test_mul_by_26:
; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: imulq $26, %rdi, %rax # sched: [3:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_26:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imulq $26, %rdi, %rax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-SLM-NEXT: imulq $26, %rdi, %rax
+; X64-SLM-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_26:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: leaq (%rdi,%rdi,4), %rax
+; X86-OPT-NEXT: leaq (%rax,%rax,4), %rax
+; X86-OPT-NEXT: addq %rdi, %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 26
ret i64 %mul
}
@@ -1422,17 +1083,11 @@ define i64 @test_mul_by_27(i64 %x) {
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_27:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_27:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-OPT-LABEL: test_mul_by_27:
+; X64-OPT: # %bb.0:
+; X64-OPT-NEXT: leaq (%rdi,%rdi,8), %rax
+; X64-OPT-NEXT: leaq (%rax,%rax,2), %rax
+; X64-OPT-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_27:
; X86-NOOPT: # %bb.0:
@@ -1442,26 +1097,15 @@ define i64 @test_mul_by_27(i64 %x) {
; X86-NOOPT-NEXT: addl %ecx, %edx
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_27:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imulq $27, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_27:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $27, %rdi, %rax # sched: [6:4.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_27:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:1.00]
-; X64-SLM-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_27:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imulq $27, %rdi, %rax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_27:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imulq $27, %rdi, %rax
+; X64-NOOPT-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_27:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: leaq (%rdi,%rdi,8), %rax
+; X86-OPT-NEXT: leaq (%rax,%rax,2), %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 27
ret i64 %mul
}
@@ -1480,17 +1124,17 @@ define i64 @test_mul_by_28(i64 %x) {
;
; X64-HSW-LABEL: test_mul_by_28:
; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
+; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax
+; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax
+; X64-HSW-NEXT: addq %rdi, %rax
+; X64-HSW-NEXT: retq
;
; X64-JAG-LABEL: test_mul_by_28:
; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax
+; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax
+; X64-JAG-NEXT: addq %rdi, %rax
+; X64-JAG-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_28:
; X86-NOOPT: # %bb.0:
@@ -1500,25 +1144,21 @@ define i64 @test_mul_by_28(i64 %x) {
; X86-NOOPT-NEXT: addl %ecx, %edx
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_28:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imulq $28, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_28:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $28, %rdi, %rax # sched: [6:4.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_28:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imulq $28, %rdi, %rax
+; X64-NOOPT-NEXT: retq
;
; X64-SLM-LABEL: test_mul_by_28:
; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: imulq $28, %rdi, %rax # sched: [3:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_28:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imulq $28, %rdi, %rax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-SLM-NEXT: imulq $28, %rdi, %rax
+; X64-SLM-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_28:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: leaq (%rdi,%rdi,8), %rax
+; X86-OPT-NEXT: leaq (%rax,%rax,2), %rax
+; X86-OPT-NEXT: addq %rdi, %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 28
ret i64 %mul
}
@@ -1538,19 +1178,19 @@ define i64 @test_mul_by_29(i64 %x) {
;
; X64-HSW-LABEL: test_mul_by_29:
; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
-; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
+; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax
+; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax
+; X64-HSW-NEXT: addq %rdi, %rax
+; X64-HSW-NEXT: addq %rdi, %rax
+; X64-HSW-NEXT: retq
;
; X64-JAG-LABEL: test_mul_by_29:
; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50]
-; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax
+; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax
+; X64-JAG-NEXT: addq %rdi, %rax
+; X64-JAG-NEXT: addq %rdi, %rax
+; X64-JAG-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_29:
; X86-NOOPT: # %bb.0:
@@ -1560,25 +1200,22 @@ define i64 @test_mul_by_29(i64 %x) {
; X86-NOOPT-NEXT: addl %ecx, %edx
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_29:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imulq $29, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_29:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $29, %rdi, %rax # sched: [6:4.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_29:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imulq $29, %rdi, %rax
+; X64-NOOPT-NEXT: retq
;
; X64-SLM-LABEL: test_mul_by_29:
; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: imulq $29, %rdi, %rax # sched: [3:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_29:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imulq $29, %rdi, %rax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-SLM-NEXT: imulq $29, %rdi, %rax
+; X64-SLM-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_29:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: leaq (%rdi,%rdi,8), %rax
+; X86-OPT-NEXT: leaq (%rax,%rax,2), %rax
+; X86-OPT-NEXT: addq %rdi, %rax
+; X86-OPT-NEXT: addq %rdi, %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 29
ret i64 %mul
}
@@ -1596,21 +1233,13 @@ define i64 @test_mul_by_30(i64 %x) {
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_30:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
-; X64-HSW-NEXT: shlq $5, %rax # sched: [1:0.50]
-; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
-; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_30:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; X64-JAG-NEXT: shlq $5, %rax # sched: [1:0.50]
-; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50]
-; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-OPT-LABEL: test_mul_by_30:
+; X64-OPT: # %bb.0:
+; X64-OPT-NEXT: movq %rdi, %rax
+; X64-OPT-NEXT: shlq $5, %rax
+; X64-OPT-NEXT: subq %rdi, %rax
+; X64-OPT-NEXT: subq %rdi, %rax
+; X64-OPT-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_30:
; X86-NOOPT: # %bb.0:
@@ -1620,28 +1249,17 @@ define i64 @test_mul_by_30(i64 %x) {
; X86-NOOPT-NEXT: addl %ecx, %edx
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_30:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imulq $30, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_30:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $30, %rdi, %rax # sched: [6:4.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_30:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; X64-SLM-NEXT: shlq $5, %rax # sched: [1:1.00]
-; X64-SLM-NEXT: subq %rdi, %rax # sched: [1:0.50]
-; X64-SLM-NEXT: subq %rdi, %rax # sched: [1:0.50]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_30:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imulq $30, %rdi, %rax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_30:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imulq $30, %rdi, %rax
+; X64-NOOPT-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_30:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: movq %rdi, %rax
+; X86-OPT-NEXT: shlq $5, %rax
+; X86-OPT-NEXT: subq %rdi, %rax
+; X86-OPT-NEXT: subq %rdi, %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 30
ret i64 %mul
}
@@ -1658,19 +1276,12 @@ define i64 @test_mul_by_31(i64 %x) {
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_31:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
-; X64-HSW-NEXT: shlq $5, %rax # sched: [1:0.50]
-; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_31:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; X64-JAG-NEXT: shlq $5, %rax # sched: [1:0.50]
-; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-OPT-LABEL: test_mul_by_31:
+; X64-OPT: # %bb.0:
+; X64-OPT-NEXT: movq %rdi, %rax
+; X64-OPT-NEXT: shlq $5, %rax
+; X64-OPT-NEXT: subq %rdi, %rax
+; X64-OPT-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_31:
; X86-NOOPT: # %bb.0:
@@ -1680,27 +1291,16 @@ define i64 @test_mul_by_31(i64 %x) {
; X86-NOOPT-NEXT: addl %ecx, %edx
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_31:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imulq $31, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_31:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $31, %rdi, %rax # sched: [6:4.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_31:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; X64-SLM-NEXT: shlq $5, %rax # sched: [1:1.00]
-; X64-SLM-NEXT: subq %rdi, %rax # sched: [1:0.50]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_31:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imulq $31, %rdi, %rax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_31:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imulq $31, %rdi, %rax
+; X64-NOOPT-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_31:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: movq %rdi, %rax
+; X86-OPT-NEXT: shlq $5, %rax
+; X86-OPT-NEXT: subq %rdi, %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 31
ret i64 %mul
}
@@ -1714,17 +1314,11 @@ define i64 @test_mul_by_32(i64 %x) {
; X86-NEXT: shll $5, %eax
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_32:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
-; X64-HSW-NEXT: shlq $5, %rax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_32:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; X64-JAG-NEXT: shlq $5, %rax # sched: [1:0.50]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-OPT-LABEL: test_mul_by_32:
+; X64-OPT: # %bb.0:
+; X64-OPT-NEXT: movq %rdi, %rax
+; X64-OPT-NEXT: shlq $5, %rax
+; X64-OPT-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_32:
; X86-NOOPT: # %bb.0:
@@ -1734,29 +1328,16 @@ define i64 @test_mul_by_32(i64 %x) {
; X86-NOOPT-NEXT: shll $5, %eax
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_32:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25]
-; HSW-NOOPT-NEXT: shlq $5, %rax # sched: [1:0.50]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_32:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; JAG-NOOPT-NEXT: shlq $5, %rax # sched: [1:0.50]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_32:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; X64-SLM-NEXT: shlq $5, %rax # sched: [1:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_32:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; SLM-NOOPT-NEXT: shlq $5, %rax # sched: [1:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_32:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: movq %rdi, %rax
+; X64-NOOPT-NEXT: shlq $5, %rax
+; X64-NOOPT-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_32:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: movq %rdi, %rax
+; X86-OPT-NEXT: shlq $5, %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 32
ret i64 %mul
}
@@ -1774,15 +1355,15 @@ define i64 @test_mul_by_37(i64 %x) {
;
; X64-HSW-LABEL: test_mul_by_37:
; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
+; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax
+; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax
+; X64-HSW-NEXT: retq
;
; X64-JAG-LABEL: test_mul_by_37:
; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax
+; X64-JAG-NEXT: leaq (%rdi,%rax,4), %rax
+; X64-JAG-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_37:
; X86-NOOPT: # %bb.0:
@@ -1792,25 +1373,20 @@ define i64 @test_mul_by_37(i64 %x) {
; X86-NOOPT-NEXT: addl %ecx, %edx
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_37:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imulq $37, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_37:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $37, %rdi, %rax # sched: [6:4.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_37:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imulq $37, %rdi, %rax
+; X64-NOOPT-NEXT: retq
;
; X64-SLM-LABEL: test_mul_by_37:
; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: imulq $37, %rdi, %rax # sched: [3:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_37:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imulq $37, %rdi, %rax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-SLM-NEXT: imulq $37, %rdi, %rax
+; X64-SLM-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_37:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: leaq (%rdi,%rdi,8), %rax
+; X86-OPT-NEXT: leaq (%rdi,%rax,4), %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 37
ret i64 %mul
}
@@ -1828,15 +1404,15 @@ define i64 @test_mul_by_41(i64 %x) {
;
; X64-HSW-LABEL: test_mul_by_41:
; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: leaq (%rdi,%rax,8), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
+; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax
+; X64-HSW-NEXT: leaq (%rdi,%rax,8), %rax
+; X64-HSW-NEXT: retq
;
; X64-JAG-LABEL: test_mul_by_41:
; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: leaq (%rdi,%rax,8), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax
+; X64-JAG-NEXT: leaq (%rdi,%rax,8), %rax
+; X64-JAG-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_41:
; X86-NOOPT: # %bb.0:
@@ -1846,25 +1422,20 @@ define i64 @test_mul_by_41(i64 %x) {
; X86-NOOPT-NEXT: addl %ecx, %edx
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_41:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imulq $41, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_41:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $41, %rdi, %rax # sched: [6:4.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_41:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imulq $41, %rdi, %rax
+; X64-NOOPT-NEXT: retq
;
; X64-SLM-LABEL: test_mul_by_41:
; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: imulq $41, %rdi, %rax # sched: [3:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_41:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imulq $41, %rdi, %rax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-SLM-NEXT: imulq $41, %rdi, %rax
+; X64-SLM-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_41:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: leaq (%rdi,%rdi,4), %rax
+; X86-OPT-NEXT: leaq (%rdi,%rax,8), %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 41
ret i64 %mul
}
@@ -1882,21 +1453,13 @@ define i64 @test_mul_by_62(i64 %x) {
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_62:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
-; X64-HSW-NEXT: shlq $6, %rax # sched: [1:0.50]
-; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
-; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_62:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; X64-JAG-NEXT: shlq $6, %rax # sched: [1:0.50]
-; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50]
-; X64-JAG-NEXT: subq %rdi, %rax # sched: [1:0.50]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-OPT-LABEL: test_mul_by_62:
+; X64-OPT: # %bb.0:
+; X64-OPT-NEXT: movq %rdi, %rax
+; X64-OPT-NEXT: shlq $6, %rax
+; X64-OPT-NEXT: subq %rdi, %rax
+; X64-OPT-NEXT: subq %rdi, %rax
+; X64-OPT-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_62:
; X86-NOOPT: # %bb.0:
@@ -1906,28 +1469,17 @@ define i64 @test_mul_by_62(i64 %x) {
; X86-NOOPT-NEXT: addl %ecx, %edx
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_62:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imulq $62, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_62:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $62, %rdi, %rax # sched: [6:4.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_62:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; X64-SLM-NEXT: shlq $6, %rax # sched: [1:1.00]
-; X64-SLM-NEXT: subq %rdi, %rax # sched: [1:0.50]
-; X64-SLM-NEXT: subq %rdi, %rax # sched: [1:0.50]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_62:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imulq $62, %rdi, %rax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_62:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imulq $62, %rdi, %rax
+; X64-NOOPT-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_62:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: movq %rdi, %rax
+; X86-OPT-NEXT: shlq $6, %rax
+; X86-OPT-NEXT: subq %rdi, %rax
+; X86-OPT-NEXT: subq %rdi, %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 62
ret i64 %mul
}
@@ -1946,17 +1498,17 @@ define i64 @test_mul_by_66(i64 %x) {
;
; X64-HSW-LABEL: test_mul_by_66:
; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
-; X64-HSW-NEXT: shlq $6, %rax # sched: [1:0.50]
-; X64-HSW-NEXT: leaq (%rax,%rdi,2), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
+; X64-HSW-NEXT: movq %rdi, %rax
+; X64-HSW-NEXT: shlq $6, %rax
+; X64-HSW-NEXT: leaq (%rax,%rdi,2), %rax
+; X64-HSW-NEXT: retq
;
; X64-JAG-LABEL: test_mul_by_66:
; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; X64-JAG-NEXT: shlq $6, %rax # sched: [1:0.50]
-; X64-JAG-NEXT: leaq (%rax,%rdi,2), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-JAG-NEXT: movq %rdi, %rax
+; X64-JAG-NEXT: shlq $6, %rax
+; X64-JAG-NEXT: leaq (%rax,%rdi,2), %rax
+; X64-JAG-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_66:
; X86-NOOPT: # %bb.0:
@@ -1966,28 +1518,24 @@ define i64 @test_mul_by_66(i64 %x) {
; X86-NOOPT-NEXT: addl %ecx, %edx
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_66:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imulq $66, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_66:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $66, %rdi, %rax # sched: [6:4.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_66:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imulq $66, %rdi, %rax
+; X64-NOOPT-NEXT: retq
;
; X64-SLM-LABEL: test_mul_by_66:
; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; X64-SLM-NEXT: shlq $6, %rax # sched: [1:1.00]
-; X64-SLM-NEXT: addq %rdi, %rax # sched: [1:0.50]
-; X64-SLM-NEXT: addq %rdi, %rax # sched: [1:0.50]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_66:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imulq $66, %rdi, %rax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-SLM-NEXT: movq %rdi, %rax
+; X64-SLM-NEXT: shlq $6, %rax
+; X64-SLM-NEXT: addq %rdi, %rax
+; X64-SLM-NEXT: addq %rdi, %rax
+; X64-SLM-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_66:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: movq %rdi, %rax
+; X86-OPT-NEXT: shlq $6, %rax
+; X86-OPT-NEXT: leaq (%rax,%rdi,2), %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 66
ret i64 %mul
}
@@ -2005,15 +1553,15 @@ define i64 @test_mul_by_73(i64 %x) {
;
; X64-HSW-LABEL: test_mul_by_73:
; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: leaq (%rdi,%rax,8), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
+; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax
+; X64-HSW-NEXT: leaq (%rdi,%rax,8), %rax
+; X64-HSW-NEXT: retq
;
; X64-JAG-LABEL: test_mul_by_73:
; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: leaq (%rdi,%rax,8), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax
+; X64-JAG-NEXT: leaq (%rdi,%rax,8), %rax
+; X64-JAG-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_73:
; X86-NOOPT: # %bb.0:
@@ -2023,25 +1571,20 @@ define i64 @test_mul_by_73(i64 %x) {
; X86-NOOPT-NEXT: addl %ecx, %edx
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_73:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imulq $73, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_73:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $73, %rdi, %rax # sched: [6:4.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_73:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imulq $73, %rdi, %rax
+; X64-NOOPT-NEXT: retq
;
; X64-SLM-LABEL: test_mul_by_73:
; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: imulq $73, %rdi, %rax # sched: [3:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_73:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imulq $73, %rdi, %rax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-SLM-NEXT: imulq $73, %rdi, %rax
+; X64-SLM-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_73:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: leaq (%rdi,%rdi,8), %rax
+; X86-OPT-NEXT: leaq (%rdi,%rax,8), %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 73
ret i64 %mul
}
@@ -2060,17 +1603,17 @@ define i64 @test_mul_by_520(i64 %x) {
;
; X64-HSW-LABEL: test_mul_by_520:
; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25]
-; X64-HSW-NEXT: shlq $9, %rax # sched: [1:0.50]
-; X64-HSW-NEXT: leaq (%rax,%rdi,8), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
+; X64-HSW-NEXT: movq %rdi, %rax
+; X64-HSW-NEXT: shlq $9, %rax
+; X64-HSW-NEXT: leaq (%rax,%rdi,8), %rax
+; X64-HSW-NEXT: retq
;
; X64-JAG-LABEL: test_mul_by_520:
; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; X64-JAG-NEXT: shlq $9, %rax # sched: [1:0.50]
-; X64-JAG-NEXT: leaq (%rax,%rdi,8), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-JAG-NEXT: movq %rdi, %rax
+; X64-JAG-NEXT: shlq $9, %rax
+; X64-JAG-NEXT: leaq (%rax,%rdi,8), %rax
+; X64-JAG-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_520:
; X86-NOOPT: # %bb.0:
@@ -2080,29 +1623,21 @@ define i64 @test_mul_by_520(i64 %x) {
; X86-NOOPT-NEXT: addl %ecx, %edx
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_520:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imulq $520, %rdi, %rax # imm = 0x208
-; HSW-NOOPT-NEXT: # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_520:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $520, %rdi, %rax # imm = 0x208
-; JAG-NOOPT-NEXT: # sched: [6:4.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_520:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imulq $520, %rdi, %rax # imm = 0x208
+; X64-NOOPT-NEXT: retq
;
; X64-SLM-LABEL: test_mul_by_520:
; X64-SLM: # %bb.0:
; X64-SLM-NEXT: imulq $520, %rdi, %rax # imm = 0x208
-; X64-SLM-NEXT: # sched: [3:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_520:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imulq $520, %rdi, %rax # imm = 0x208
-; SLM-NOOPT-NEXT: # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-SLM-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_520:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: movq %rdi, %rax
+; X86-OPT-NEXT: shlq $9, %rax
+; X86-OPT-NEXT: leaq (%rax,%rdi,8), %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 520
ret i64 %mul
}
@@ -2126,19 +1661,12 @@ define i64 @test_mul_by_neg10(i64 %x) {
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_neg10:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25]
-; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: negq %rax # sched: [1:0.25]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_neg10:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: addq %rdi, %rdi # sched: [1:0.50]
-; X64-JAG-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: negq %rax # sched: [1:0.50]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-OPT-LABEL: test_mul_by_neg10:
+; X64-OPT: # %bb.0:
+; X64-OPT-NEXT: addq %rdi, %rdi
+; X64-OPT-NEXT: leaq (%rdi,%rdi,4), %rax
+; X64-OPT-NEXT: negq %rax
+; X64-OPT-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_neg10:
; X86-NOOPT: # %bb.0:
@@ -2151,27 +1679,16 @@ define i64 @test_mul_by_neg10(i64 %x) {
; X86-NOOPT-NEXT: addl %ecx, %edx
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_neg10:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imulq $-10, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_neg10:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $-10, %rdi, %rax # sched: [6:4.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_neg10:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: addq %rdi, %rdi # sched: [1:0.50]
-; X64-SLM-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:1.00]
-; X64-SLM-NEXT: negq %rax # sched: [1:0.50]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_neg10:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imulq $-10, %rdi, %rax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_neg10:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imulq $-10, %rdi, %rax
+; X64-NOOPT-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_neg10:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: addq %rdi, %rdi
+; X86-OPT-NEXT: leaq (%rdi,%rdi,4), %rax
+; X86-OPT-NEXT: negq %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, -10
ret i64 %mul
}
@@ -2195,19 +1712,12 @@ define i64 @test_mul_by_neg36(i64 %x) {
; X86-NEXT: .cfi_def_cfa_offset 4
; X86-NEXT: retl
;
-; X64-HSW-LABEL: test_mul_by_neg36:
-; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: shlq $2, %rdi # sched: [1:0.50]
-; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: negq %rax # sched: [1:0.25]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
-;
-; X64-JAG-LABEL: test_mul_by_neg36:
-; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: shlq $2, %rdi # sched: [1:0.50]
-; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: negq %rax # sched: [1:0.50]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-OPT-LABEL: test_mul_by_neg36:
+; X64-OPT: # %bb.0:
+; X64-OPT-NEXT: shlq $2, %rdi
+; X64-OPT-NEXT: leaq (%rdi,%rdi,8), %rax
+; X64-OPT-NEXT: negq %rax
+; X64-OPT-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_by_neg36:
; X86-NOOPT: # %bb.0:
@@ -2220,27 +1730,16 @@ define i64 @test_mul_by_neg36(i64 %x) {
; X86-NOOPT-NEXT: addl %ecx, %edx
; X86-NOOPT-NEXT: retl
;
-; HSW-NOOPT-LABEL: test_mul_by_neg36:
-; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: imulq $-36, %rdi, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
-;
-; JAG-NOOPT-LABEL: test_mul_by_neg36:
-; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: imulq $-36, %rdi, %rax # sched: [6:4.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
-;
-; X64-SLM-LABEL: test_mul_by_neg36:
-; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: shlq $2, %rdi # sched: [1:1.00]
-; X64-SLM-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:1.00]
-; X64-SLM-NEXT: negq %rax # sched: [1:0.50]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
-;
-; SLM-NOOPT-LABEL: test_mul_by_neg36:
-; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: imulq $-36, %rdi, %rax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; X64-NOOPT-LABEL: test_mul_by_neg36:
+; X64-NOOPT: # %bb.0:
+; X64-NOOPT-NEXT: imulq $-36, %rdi, %rax
+; X64-NOOPT-NEXT: retq
+; X86-OPT-LABEL: test_mul_by_neg36:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: shlq $2, %rdi
+; X86-OPT-NEXT: leaq (%rdi,%rdi,8), %rax
+; X86-OPT-NEXT: negq %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, -36
ret i64 %mul
}
@@ -2281,19 +1780,19 @@ define i64 @test_mul_spec(i64 %x) nounwind {
;
; X64-HSW-LABEL: test_mul_spec:
; X64-HSW: # %bb.0:
-; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rcx # sched: [1:0.50]
-; X64-HSW-NEXT: addq $42, %rcx # sched: [1:0.25]
-; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: addq $2, %rax # sched: [1:0.25]
-; X64-HSW-NEXT: imulq %rcx, %rax # sched: [3:1.00]
-; X64-HSW-NEXT: retq # sched: [7:1.00]
+; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rcx
+; X64-HSW-NEXT: addq $42, %rcx
+; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax
+; X64-HSW-NEXT: addq $2, %rax
+; X64-HSW-NEXT: imulq %rcx, %rax
+; X64-HSW-NEXT: retq
;
; X64-JAG-LABEL: test_mul_spec:
; X64-JAG: # %bb.0:
-; X64-JAG-NEXT: leaq 42(%rdi,%rdi,8), %rcx # sched: [2:1.00]
-; X64-JAG-NEXT: leaq 2(%rdi,%rdi,4), %rax # sched: [2:1.00]
-; X64-JAG-NEXT: imulq %rcx, %rax # sched: [6:4.00]
-; X64-JAG-NEXT: retq # sched: [4:1.00]
+; X64-JAG-NEXT: leaq 42(%rdi,%rdi,8), %rcx
+; X64-JAG-NEXT: leaq 2(%rdi,%rdi,4), %rax
+; X64-JAG-NEXT: imulq %rcx, %rax
+; X64-JAG-NEXT: retq
;
; X86-NOOPT-LABEL: test_mul_spec:
; X86-NOOPT: # %bb.0:
@@ -2329,33 +1828,39 @@ define i64 @test_mul_spec(i64 %x) nounwind {
;
; HSW-NOOPT-LABEL: test_mul_spec:
; HSW-NOOPT: # %bb.0:
-; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rcx # sched: [1:0.50]
-; HSW-NOOPT-NEXT: addq $42, %rcx # sched: [1:0.25]
-; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50]
-; HSW-NOOPT-NEXT: addq $2, %rax # sched: [1:0.25]
-; HSW-NOOPT-NEXT: imulq %rcx, %rax # sched: [3:1.00]
-; HSW-NOOPT-NEXT: retq # sched: [7:1.00]
+; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rcx
+; HSW-NOOPT-NEXT: addq $42, %rcx
+; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax
+; HSW-NOOPT-NEXT: addq $2, %rax
+; HSW-NOOPT-NEXT: imulq %rcx, %rax
+; HSW-NOOPT-NEXT: retq
;
; JAG-NOOPT-LABEL: test_mul_spec:
; JAG-NOOPT: # %bb.0:
-; JAG-NOOPT-NEXT: leaq 42(%rdi,%rdi,8), %rcx # sched: [2:1.00]
-; JAG-NOOPT-NEXT: leaq 2(%rdi,%rdi,4), %rax # sched: [2:1.00]
-; JAG-NOOPT-NEXT: imulq %rcx, %rax # sched: [6:4.00]
-; JAG-NOOPT-NEXT: retq # sched: [4:1.00]
+; JAG-NOOPT-NEXT: leaq 42(%rdi,%rdi,8), %rcx
+; JAG-NOOPT-NEXT: leaq 2(%rdi,%rdi,4), %rax
+; JAG-NOOPT-NEXT: imulq %rcx, %rax
+; JAG-NOOPT-NEXT: retq
;
; X64-SLM-LABEL: test_mul_spec:
; X64-SLM: # %bb.0:
-; X64-SLM-NEXT: leaq 42(%rdi,%rdi,8), %rcx # sched: [1:1.00]
-; X64-SLM-NEXT: leaq 2(%rdi,%rdi,4), %rax # sched: [1:1.00]
-; X64-SLM-NEXT: imulq %rcx, %rax # sched: [3:1.00]
-; X64-SLM-NEXT: retq # sched: [4:1.00]
+; X64-SLM-NEXT: leaq 42(%rdi,%rdi,8), %rcx
+; X64-SLM-NEXT: leaq 2(%rdi,%rdi,4), %rax
+; X64-SLM-NEXT: imulq %rcx, %rax
+; X64-SLM-NEXT: retq
;
; SLM-NOOPT-LABEL: test_mul_spec:
; SLM-NOOPT: # %bb.0:
-; SLM-NOOPT-NEXT: leaq 42(%rdi,%rdi,8), %rcx # sched: [1:1.00]
-; SLM-NOOPT-NEXT: leaq 2(%rdi,%rdi,4), %rax # sched: [1:1.00]
-; SLM-NOOPT-NEXT: imulq %rcx, %rax # sched: [3:1.00]
-; SLM-NOOPT-NEXT: retq # sched: [4:1.00]
+; SLM-NOOPT-NEXT: leaq 42(%rdi,%rdi,8), %rcx
+; SLM-NOOPT-NEXT: leaq 2(%rdi,%rdi,4), %rax
+; SLM-NOOPT-NEXT: imulq %rcx, %rax
+; SLM-NOOPT-NEXT: retq
+; X86-OPT-LABEL: test_mul_spec:
+; X86-OPT: # %bb.0:
+; X86-OPT-NEXT: leaq 42(%rdi,%rdi,8), %rcx
+; X86-OPT-NEXT: leaq 2(%rdi,%rdi,4), %rax
+; X86-OPT-NEXT: imulq %rcx, %rax
+; X86-OPT-NEXT: retq
%mul = mul nsw i64 %x, 9
%add = add nsw i64 %mul, 42
%mul2 = mul nsw i64 %x, 5
diff --git a/llvm/test/CodeGen/X86/mwaitx-schedule.ll b/llvm/test/CodeGen/X86/mwaitx-schedule.ll
deleted file mode 100644
index ea135fdda80..00000000000
--- a/llvm/test/CodeGen/X86/mwaitx-schedule.ll
+++ /dev/null
@@ -1,65 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+mwaitx | FileCheck %s --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver4 | FileCheck %s --check-prefix=BDVER4
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=ZNVER1
-
-define void @foo(i8* %P, i32 %E, i32 %H) nounwind {
-; GENERIC-LABEL: foo:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl %esi, %ecx # sched: [1:0.33]
-; GENERIC-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
-; GENERIC-NEXT: monitorx # sched: [100:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER4-LABEL: foo:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: movl %esi, %ecx
-; BDVER4-NEXT: leaq (%rdi), %rax
-; BDVER4-NEXT: monitorx
-; BDVER4-NEXT: retq
-;
-; ZNVER1-LABEL: foo:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl %esi, %ecx # sched: [1:0.25]
-; ZNVER1-NEXT: leaq (%rdi), %rax # sched: [1:0.25]
-; ZNVER1-NEXT: monitorx # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void @llvm.x86.monitorx(i8* %P, i32 %E, i32 %H)
- ret void
-}
-declare void @llvm.x86.monitorx(i8*, i32, i32) nounwind
-
-define void @bar(i32 %E, i32 %H, i32 %C) nounwind {
-; GENERIC-LABEL: bar:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pushq %rbx # sched: [5:1.00]
-; GENERIC-NEXT: movl %edx, %ebx # sched: [1:0.33]
-; GENERIC-NEXT: movl %esi, %eax # sched: [1:0.33]
-; GENERIC-NEXT: movl %edi, %ecx # sched: [1:0.33]
-; GENERIC-NEXT: mwaitx # sched: [100:0.33]
-; GENERIC-NEXT: popq %rbx # sched: [6:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER4-LABEL: bar:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: pushq %rbx
-; BDVER4-NEXT: movl %edx, %ebx
-; BDVER4-NEXT: movl %esi, %eax
-; BDVER4-NEXT: movl %edi, %ecx
-; BDVER4-NEXT: mwaitx
-; BDVER4-NEXT: popq %rbx
-; BDVER4-NEXT: retq
-;
-; ZNVER1-LABEL: bar:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: pushq %rbx # sched: [1:0.50]
-; ZNVER1-NEXT: movl %edx, %ebx # sched: [1:0.25]
-; ZNVER1-NEXT: movl %esi, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: movl %edi, %ecx # sched: [1:0.25]
-; ZNVER1-NEXT: mwaitx # sched: [100:0.25]
-; ZNVER1-NEXT: popq %rbx # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void @llvm.x86.mwaitx(i32 %E, i32 %H, i32 %C)
- ret void
-}
-declare void @llvm.x86.mwaitx(i32, i32, i32) nounwind
diff --git a/llvm/test/CodeGen/X86/popcnt-schedule.ll b/llvm/test/CodeGen/X86/popcnt-schedule.ll
deleted file mode 100644
index a039ba01a23..00000000000
--- a/llvm/test/CodeGen/X86/popcnt-schedule.ll
+++ /dev/null
@@ -1,235 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+popcnt | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=goldmont | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
-
-define i16 @test_ctpop_i16(i16 zeroext %a0, i16 *%a1) {
-; GENERIC-LABEL: test_ctpop_i16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: popcntw (%rsi), %cx # sched: [9:1.00]
-; GENERIC-NEXT: popcntw %di, %ax # sched: [3:1.00]
-; GENERIC-NEXT: orl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_ctpop_i16:
-; SLM: # %bb.0:
-; SLM-NEXT: popcntw (%rsi), %cx # sched: [6:1.00]
-; SLM-NEXT: popcntw %di, %ax # sched: [3:1.00]
-; SLM-NEXT: orl %ecx, %eax # sched: [1:0.50]
-; SLM-NEXT: # kill: def $ax killed $ax killed $eax
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_ctpop_i16:
-; SANDY: # %bb.0:
-; SANDY-NEXT: popcntw (%rsi), %cx # sched: [9:1.00]
-; SANDY-NEXT: popcntw %di, %ax # sched: [3:1.00]
-; SANDY-NEXT: orl %ecx, %eax # sched: [1:0.33]
-; SANDY-NEXT: # kill: def $ax killed $ax killed $eax
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_ctpop_i16:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: popcntw (%rsi), %cx # sched: [8:1.00]
-; HASWELL-NEXT: popcntw %di, %ax # sched: [3:1.00]
-; HASWELL-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT: # kill: def $ax killed $ax killed $eax
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_ctpop_i16:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: popcntw (%rsi), %cx # sched: [8:1.00]
-; BROADWELL-NEXT: popcntw %di, %ax # sched: [3:1.00]
-; BROADWELL-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: # kill: def $ax killed $ax killed $eax
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_ctpop_i16:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: popcntw (%rsi), %cx # sched: [8:1.00]
-; SKYLAKE-NEXT: popcntw %di, %ax # sched: [3:1.00]
-; SKYLAKE-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: # kill: def $ax killed $ax killed $eax
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_ctpop_i16:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: popcntw (%rsi), %cx # sched: [8:0.50]
-; BDVER2-NEXT: popcntw %di, %ax # sched: [4:0.50]
-; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.50]
-; BDVER2-NEXT: # kill: def $ax killed $ax killed $eax
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_ctpop_i16:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: popcntw (%rsi), %cx # sched: [4:1.00]
-; BTVER2-NEXT: popcntw %di, %ax # sched: [1:0.50]
-; BTVER2-NEXT: orl %ecx, %eax # sched: [1:0.50]
-; BTVER2-NEXT: # kill: def $ax killed $ax killed $eax
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_ctpop_i16:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: popcntw (%rsi), %cx # sched: [5:0.50]
-; ZNVER1-NEXT: popcntw %di, %ax # sched: [1:0.25]
-; ZNVER1-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: # kill: def $ax killed $ax killed $eax
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i16, i16 *%a1
- %2 = tail call i16 @llvm.ctpop.i16( i16 %1 )
- %3 = tail call i16 @llvm.ctpop.i16( i16 %a0 )
- %4 = or i16 %2, %3
- ret i16 %4
-}
-declare i16 @llvm.ctpop.i16(i16)
-
-define i32 @test_ctpop_i32(i32 %a0, i32 *%a1) {
-; GENERIC-LABEL: test_ctpop_i32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: popcntl (%rsi), %ecx # sched: [9:1.00]
-; GENERIC-NEXT: popcntl %edi, %eax # sched: [3:1.00]
-; GENERIC-NEXT: orl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_ctpop_i32:
-; SLM: # %bb.0:
-; SLM-NEXT: popcntl (%rsi), %ecx # sched: [6:1.00]
-; SLM-NEXT: popcntl %edi, %eax # sched: [3:1.00]
-; SLM-NEXT: orl %ecx, %eax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_ctpop_i32:
-; SANDY: # %bb.0:
-; SANDY-NEXT: popcntl (%rsi), %ecx # sched: [9:1.00]
-; SANDY-NEXT: popcntl %edi, %eax # sched: [3:1.00]
-; SANDY-NEXT: orl %ecx, %eax # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_ctpop_i32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: popcntl (%rsi), %ecx # sched: [8:1.00]
-; HASWELL-NEXT: popcntl %edi, %eax # sched: [3:1.00]
-; HASWELL-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_ctpop_i32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: popcntl (%rsi), %ecx # sched: [8:1.00]
-; BROADWELL-NEXT: popcntl %edi, %eax # sched: [3:1.00]
-; BROADWELL-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_ctpop_i32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: popcntl (%rsi), %ecx # sched: [8:1.00]
-; SKYLAKE-NEXT: popcntl %edi, %eax # sched: [3:1.00]
-; SKYLAKE-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_ctpop_i32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: popcntl (%rsi), %ecx # sched: [8:0.50]
-; BDVER2-NEXT: popcntl %edi, %eax # sched: [4:0.50]
-; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_ctpop_i32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: popcntl (%rsi), %ecx # sched: [4:1.00]
-; BTVER2-NEXT: popcntl %edi, %eax # sched: [1:0.50]
-; BTVER2-NEXT: orl %ecx, %eax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_ctpop_i32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: popcntl (%rsi), %ecx # sched: [5:0.50]
-; ZNVER1-NEXT: popcntl %edi, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i32, i32 *%a1
- %2 = tail call i32 @llvm.ctpop.i32( i32 %1 )
- %3 = tail call i32 @llvm.ctpop.i32( i32 %a0 )
- %4 = or i32 %2, %3
- ret i32 %4
-}
-declare i32 @llvm.ctpop.i32(i32)
-
-define i64 @test_ctpop_i64(i64 %a0, i64 *%a1) {
-; GENERIC-LABEL: test_ctpop_i64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: popcntq (%rsi), %rcx # sched: [9:1.00]
-; GENERIC-NEXT: popcntq %rdi, %rax # sched: [3:1.00]
-; GENERIC-NEXT: orq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_ctpop_i64:
-; SLM: # %bb.0:
-; SLM-NEXT: popcntq (%rsi), %rcx # sched: [6:1.00]
-; SLM-NEXT: popcntq %rdi, %rax # sched: [3:1.00]
-; SLM-NEXT: orq %rcx, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_ctpop_i64:
-; SANDY: # %bb.0:
-; SANDY-NEXT: popcntq (%rsi), %rcx # sched: [9:1.00]
-; SANDY-NEXT: popcntq %rdi, %rax # sched: [3:1.00]
-; SANDY-NEXT: orq %rcx, %rax # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_ctpop_i64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: popcntq (%rsi), %rcx # sched: [8:1.00]
-; HASWELL-NEXT: popcntq %rdi, %rax # sched: [3:1.00]
-; HASWELL-NEXT: orq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_ctpop_i64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: popcntq (%rsi), %rcx # sched: [8:1.00]
-; BROADWELL-NEXT: popcntq %rdi, %rax # sched: [3:1.00]
-; BROADWELL-NEXT: orq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_ctpop_i64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: popcntq (%rsi), %rcx # sched: [8:1.00]
-; SKYLAKE-NEXT: popcntq %rdi, %rax # sched: [3:1.00]
-; SKYLAKE-NEXT: orq %rcx, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_ctpop_i64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: popcntq (%rsi), %rcx # sched: [8:0.50]
-; BDVER2-NEXT: popcntq %rdi, %rax # sched: [4:0.50]
-; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_ctpop_i64:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: popcntq (%rsi), %rcx # sched: [4:1.00]
-; BTVER2-NEXT: popcntq %rdi, %rax # sched: [1:0.50]
-; BTVER2-NEXT: orq %rcx, %rax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_ctpop_i64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: popcntq (%rsi), %rcx # sched: [5:0.50]
-; ZNVER1-NEXT: popcntq %rdi, %rax # sched: [1:0.25]
-; ZNVER1-NEXT: orq %rcx, %rax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i64, i64 *%a1
- %2 = tail call i64 @llvm.ctpop.i64( i64 %1 )
- %3 = tail call i64 @llvm.ctpop.i64( i64 %a0 )
- %4 = or i64 %2, %3
- ret i64 %4
-}
-declare i64 @llvm.ctpop.i64(i64)
diff --git a/llvm/test/CodeGen/X86/rdpid-schedule.ll b/llvm/test/CodeGen/X86/rdpid-schedule.ll
deleted file mode 100644
index f1a60dc54a6..00000000000
--- a/llvm/test/CodeGen/X86/rdpid-schedule.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+rdpid | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=icelake-client | FileCheck %s --check-prefix=CHECK --check-prefix=ICELAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=icelake-server | FileCheck %s --check-prefix=CHECK --check-prefix=ICELAKE
-
-define i32 @test_rdpid() {
-; GENERIC-LABEL: test_rdpid:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: rdpid %rax # sched: [100:0.33]
-; GENERIC-NEXT: # kill: def $eax killed $eax killed $rax
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ICELAKE-LABEL: test_rdpid:
-; ICELAKE: # %bb.0:
-; ICELAKE-NEXT: rdpid %rax # sched: [100:0.25]
-; ICELAKE-NEXT: # kill: def $eax killed $eax killed $rax
-; ICELAKE-NEXT: retq # sched: [7:1.00]
- %1 = tail call i32 @llvm.x86.rdpid()
- ret i32 %1
-}
-declare i32 @llvm.x86.rdpid()
diff --git a/llvm/test/CodeGen/X86/rdrand-schedule.ll b/llvm/test/CodeGen/X86/rdrand-schedule.ll
deleted file mode 100644
index fa38b24bd82..00000000000
--- a/llvm/test/CodeGen/X86/rdrand-schedule.ll
+++ /dev/null
@@ -1,148 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+rdrnd | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=goldmont | FileCheck %s --check-prefix=CHECK --check-prefix=GOLDMONT
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=IVY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
-
-declare {i16, i32} @llvm.x86.rdrand.16()
-declare {i32, i32} @llvm.x86.rdrand.32()
-declare {i64, i32} @llvm.x86.rdrand.64()
-
-define i16 @test_rdrand_16(i16* %random_val) {
-; GENERIC-LABEL: test_rdrand_16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: rdrandw %ax # sched: [100:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; GOLDMONT-LABEL: test_rdrand_16:
-; GOLDMONT: # %bb.0:
-; GOLDMONT-NEXT: rdrandw %ax # sched: [100:1.00]
-; GOLDMONT-NEXT: retq # sched: [4:1.00]
-;
-; IVY-LABEL: test_rdrand_16:
-; IVY: # %bb.0:
-; IVY-NEXT: rdrandw %ax # sched: [100:0.33]
-; IVY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_rdrand_16:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: rdrandw %ax # sched: [1:5.33]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_rdrand_16:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: rdrandw %ax # sched: [9:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_rdrand_16:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: rdrandw %ax # sched: [100:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_rdrand_16:
-; SKX: # %bb.0:
-; SKX-NEXT: rdrandw %ax # sched: [100:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_rdrand_16:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: rdrandw %ax # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %call = call {i16, i32} @llvm.x86.rdrand.16()
- %randval = extractvalue {i16, i32} %call, 0
- ret i16 %randval
-}
-
-define i32 @test_rdrand_32(i32* %random_val) {
-; GENERIC-LABEL: test_rdrand_32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: rdrandl %eax # sched: [100:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; GOLDMONT-LABEL: test_rdrand_32:
-; GOLDMONT: # %bb.0:
-; GOLDMONT-NEXT: rdrandl %eax # sched: [100:1.00]
-; GOLDMONT-NEXT: retq # sched: [4:1.00]
-;
-; IVY-LABEL: test_rdrand_32:
-; IVY: # %bb.0:
-; IVY-NEXT: rdrandl %eax # sched: [100:0.33]
-; IVY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_rdrand_32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: rdrandl %eax # sched: [1:5.33]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_rdrand_32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: rdrandl %eax # sched: [9:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_rdrand_32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: rdrandl %eax # sched: [100:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_rdrand_32:
-; SKX: # %bb.0:
-; SKX-NEXT: rdrandl %eax # sched: [100:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_rdrand_32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: rdrandl %eax # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %call = call {i32, i32} @llvm.x86.rdrand.32()
- %randval = extractvalue {i32, i32} %call, 0
- ret i32 %randval
-}
-
-define i64 @test_rdrand_64(i64* %random_val) {
-; GENERIC-LABEL: test_rdrand_64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: rdrandq %rax # sched: [100:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; GOLDMONT-LABEL: test_rdrand_64:
-; GOLDMONT: # %bb.0:
-; GOLDMONT-NEXT: rdrandq %rax # sched: [100:1.00]
-; GOLDMONT-NEXT: retq # sched: [4:1.00]
-;
-; IVY-LABEL: test_rdrand_64:
-; IVY: # %bb.0:
-; IVY-NEXT: rdrandq %rax # sched: [100:0.33]
-; IVY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_rdrand_64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: rdrandq %rax # sched: [1:5.33]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_rdrand_64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: rdrandq %rax # sched: [9:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_rdrand_64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: rdrandq %rax # sched: [100:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_rdrand_64:
-; SKX: # %bb.0:
-; SKX-NEXT: rdrandq %rax # sched: [100:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_rdrand_64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: rdrandq %rax # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %call = call {i64, i32} @llvm.x86.rdrand.64()
- %randval = extractvalue {i64, i32} %call, 0
- ret i64 %randval
-}
diff --git a/llvm/test/CodeGen/X86/rdseed-schedule.ll b/llvm/test/CodeGen/X86/rdseed-schedule.ll
deleted file mode 100644
index b3feeabe797..00000000000
--- a/llvm/test/CodeGen/X86/rdseed-schedule.ll
+++ /dev/null
@@ -1,116 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+rdseed | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=goldmont | FileCheck %s --check-prefix=CHECK --check-prefix=GOLDMONT
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
-
-declare {i16, i32} @llvm.x86.rdseed.16()
-declare {i32, i32} @llvm.x86.rdseed.32()
-declare {i64, i32} @llvm.x86.rdseed.64()
-
-define i16 @test_rdseed_16(i16* %random_val) {
-; GENERIC-LABEL: test_rdseed_16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: rdseedw %ax # sched: [100:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; GOLDMONT-LABEL: test_rdseed_16:
-; GOLDMONT: # %bb.0:
-; GOLDMONT-NEXT: rdseedw %ax # sched: [100:1.00]
-; GOLDMONT-NEXT: retq # sched: [4:1.00]
-;
-; BROADWELL-LABEL: test_rdseed_16:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: rdseedw %ax # sched: [100:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_rdseed_16:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: rdseedw %ax # sched: [100:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_rdseed_16:
-; SKX: # %bb.0:
-; SKX-NEXT: rdseedw %ax # sched: [100:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_rdseed_16:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: rdseedw %ax # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %call = call {i16, i32} @llvm.x86.rdseed.16()
- %randval = extractvalue {i16, i32} %call, 0
- ret i16 %randval
-}
-
-define i32 @test_rdseed_32(i16* %random_val) {
-; GENERIC-LABEL: test_rdseed_32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: rdseedl %eax # sched: [100:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; GOLDMONT-LABEL: test_rdseed_32:
-; GOLDMONT: # %bb.0:
-; GOLDMONT-NEXT: rdseedl %eax # sched: [100:1.00]
-; GOLDMONT-NEXT: retq # sched: [4:1.00]
-;
-; BROADWELL-LABEL: test_rdseed_32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: rdseedl %eax # sched: [100:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_rdseed_32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: rdseedl %eax # sched: [100:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_rdseed_32:
-; SKX: # %bb.0:
-; SKX-NEXT: rdseedl %eax # sched: [100:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_rdseed_32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: rdseedl %eax # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %call = call {i32, i32} @llvm.x86.rdseed.32()
- %randval = extractvalue {i32, i32} %call, 0
- ret i32 %randval
-}
-
-define i64 @test_rdseed_64(i64* %random_val) {
-; GENERIC-LABEL: test_rdseed_64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: rdseedq %rax # sched: [100:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; GOLDMONT-LABEL: test_rdseed_64:
-; GOLDMONT: # %bb.0:
-; GOLDMONT-NEXT: rdseedq %rax # sched: [100:1.00]
-; GOLDMONT-NEXT: retq # sched: [4:1.00]
-;
-; BROADWELL-LABEL: test_rdseed_64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: rdseedq %rax # sched: [100:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_rdseed_64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: rdseedq %rax # sched: [100:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_rdseed_64:
-; SKX: # %bb.0:
-; SKX-NEXT: rdseedq %rax # sched: [100:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_rdseed_64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: rdseedq %rax # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %call = call {i64, i32} @llvm.x86.rdseed.64()
- %randval = extractvalue {i64, i32} %call, 0
- ret i64 %randval
-}
diff --git a/llvm/test/CodeGen/X86/recip-fastmath.ll b/llvm/test/CodeGen/X86/recip-fastmath.ll
index a68940eb11a..c8908b54c50 100644
--- a/llvm/test/CodeGen/X86/recip-fastmath.ll
+++ b/llvm/test/CodeGen/X86/recip-fastmath.ll
@@ -2,13 +2,13 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE-RECIP
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX-RECIP
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=FMA-RECIP
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BDVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BDVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BTVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=SANDY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -mattr=-fma | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL-NO-FMA
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 --check-prefix=KNL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 --check-prefix=SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 --check-prefix=KNL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 --check-prefix=SKX
; If the target's divss/divps instructions are substantially
; slower than rcpss/rcpps with a Newton-Raphson refinement,
@@ -26,59 +26,11 @@ define float @f32_no_estimate(float %x) #0 {
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
-; AVX-RECIP-LABEL: f32_no_estimate:
-; AVX-RECIP: # %bb.0:
-; AVX-RECIP-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX-RECIP-NEXT: vdivss %xmm0, %xmm1, %xmm0
-; AVX-RECIP-NEXT: retq
-;
-; FMA-RECIP-LABEL: f32_no_estimate:
-; FMA-RECIP: # %bb.0:
-; FMA-RECIP-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; FMA-RECIP-NEXT: vdivss %xmm0, %xmm1, %xmm0
-; FMA-RECIP-NEXT: retq
-;
-; BDVER2-LABEL: f32_no_estimate:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; BDVER2-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [9:9.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: f32_no_estimate:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
-; BTVER2-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [19:19.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: f32_no_estimate:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
-; SANDY-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [14:14.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: f32_no_estimate:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; HASWELL-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [13:7.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-NO-FMA-LABEL: f32_no_estimate:
-; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; HASWELL-NO-FMA-NEXT: vdivss %xmm0, %xmm1, %xmm0
-; HASWELL-NO-FMA-NEXT: retq
-;
-; KNL-LABEL: f32_no_estimate:
-; KNL: # %bb.0:
-; KNL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; KNL-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [13:7.00]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: f32_no_estimate:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKX-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [11:3.00]
-; SKX-NEXT: retq # sched: [7:1.00]
+; AVX-LABEL: f32_no_estimate:
+; AVX: # %bb.0:
+; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
%div = fdiv fast float 1.0, %x
ret float %div
}
@@ -114,37 +66,37 @@ define float @f32_one_step(float %x) #1 {
;
; BDVER2-LABEL: f32_one_step:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; BDVER2-NEXT: vfnmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; BDVER2-NEXT: vfnmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0
+; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: f32_one_step:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
-; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [2:1.00]
-; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; BTVER2-NEXT: vsubss %xmm0, %xmm2, %xmm0
+; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: f32_one_step:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
-; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0
+; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: f32_one_step:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem
+; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: f32_one_step:
; HASWELL-NO-FMA: # %bb.0:
@@ -156,19 +108,12 @@ define float @f32_one_step(float %x) #1 {
; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0
; HASWELL-NO-FMA-NEXT: retq
;
-; KNL-LABEL: f32_one_step:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; KNL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; KNL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: f32_one_step:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
-; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50]
-; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; AVX512-LABEL: f32_one_step:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; AVX512-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem
+; AVX512-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
+; AVX512-NEXT: retq
%div = fdiv fast float 1.0, %x
ret float %div
}
@@ -218,52 +163,52 @@ define float @f32_two_step(float %x) #2 {
;
; BDVER2-LABEL: f32_two_step:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; BDVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm3 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddss %xmm1, %xmm3, %xmm1, %xmm1 # sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; BDVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm3
+; BDVER2-NEXT: vfmaddss %xmm1, %xmm3, %xmm1, %xmm1
+; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: f32_two_step:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [5:1.00]
-; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [2:1.00]
-; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [2:1.00]
-; BTVER2-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
-; BTVER2-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [2:1.00]
-; BTVER2-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
-; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm2
+; BTVER2-NEXT: vsubss %xmm2, %xmm3, %xmm2
+; BTVER2-NEXT: vmulss %xmm2, %xmm1, %xmm2
+; BTVER2-NEXT: vaddss %xmm2, %xmm1, %xmm1
+; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; BTVER2-NEXT: vsubss %xmm0, %xmm3, %xmm0
+; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: f32_two_step:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
-; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [6:0.50]
-; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
-; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
-; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
-; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2
+; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2
+; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2
+; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1
+; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0
+; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: f32_two_step:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
-; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [5:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; HASWELL-NEXT: vmovaps %xmm1, %xmm3
+; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2
+; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1
+; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2
+; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: f32_two_step:
; HASWELL-NO-FMA: # %bb.0:
@@ -279,27 +224,16 @@ define float @f32_two_step(float %x) #2 {
; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0
; HASWELL-NO-FMA-NEXT: retq
;
-; KNL-LABEL: f32_two_step:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; KNL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
-; KNL-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [5:0.50]
-; KNL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [5:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: f32_two_step:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
-; SKX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:0.33]
-; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.50]
-; SKX-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.50]
-; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.50]
-; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; AVX512-LABEL: f32_two_step:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; AVX512-NEXT: vmovaps %xmm1, %xmm3
+; AVX512-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2
+; AVX512-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1
+; AVX512-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2
+; AVX512-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3
+; AVX512-NEXT: retq
%div = fdiv fast float 1.0, %x
ret float %div
}
@@ -326,27 +260,27 @@ define <4 x float> @v4f32_no_estimate(<4 x float> %x) #0 {
;
; BDVER2-LABEL: v4f32_no_estimate:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50]
-; BDVER2-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [9:9.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vmovaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BDVER2-NEXT: vdivps %xmm0, %xmm1, %xmm0
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v4f32_no_estimate:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
-; BTVER2-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [19:19.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BTVER2-NEXT: vdivps %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v4f32_no_estimate:
; SANDY: # %bb.0:
-; SANDY-NEXT: vmovaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; SANDY-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [14:14.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vmovaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; SANDY-NEXT: vdivps %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v4f32_no_estimate:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; HASWELL-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [13:7.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NEXT: vdivps %xmm0, %xmm1, %xmm0
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v4f32_no_estimate:
; HASWELL-NO-FMA: # %bb.0:
@@ -354,17 +288,11 @@ define <4 x float> @v4f32_no_estimate(<4 x float> %x) #0 {
; HASWELL-NO-FMA-NEXT: vdivps %xmm0, %xmm1, %xmm0
; HASWELL-NO-FMA-NEXT: retq
;
-; KNL-LABEL: v4f32_no_estimate:
-; KNL: # %bb.0:
-; KNL-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; KNL-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [13:7.00]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: v4f32_no_estimate:
-; SKX: # %bb.0:
-; SKX-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; SKX-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [11:3.00]
-; SKX-NEXT: retq # sched: [7:1.00]
+; AVX512-LABEL: v4f32_no_estimate:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; AVX512-NEXT: vdivps %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: retq
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <4 x float> %div
}
@@ -400,38 +328,38 @@ define <4 x float> @v4f32_one_step(<4 x float> %x) #1 {
;
; BDVER2-LABEL: v4f32_one_step:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpps %xmm0, %xmm1
+; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %xmm1, %xmm0, %xmm0
+; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v4f32_one_step:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
-; BTVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [2:1.00]
-; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BTVER2-NEXT: vrcpps %xmm0, %xmm1
+; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; BTVER2-NEXT: vsubps %xmm0, %xmm2, %xmm0
+; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v4f32_one_step:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpps %xmm0, %xmm1
+; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0
+; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v4f32_one_step:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpps %xmm0, %xmm1
+; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v4f32_one_step:
; HASWELL-NO-FMA: # %bb.0:
@@ -445,18 +373,18 @@ define <4 x float> @v4f32_one_step(<4 x float> %x) #1 {
;
; KNL-LABEL: v4f32_one_step:
; KNL: # %bb.0:
-; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
+; KNL-NEXT: vrcpps %xmm0, %xmm1
+; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
+; KNL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
+; KNL-NEXT: retq
;
; SKX-LABEL: v4f32_one_step:
; SKX: # %bb.0:
-; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; SKX-NEXT: vrcpps %xmm0, %xmm1
+; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem
+; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
+; SKX-NEXT: retq
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <4 x float> %div
}
@@ -506,52 +434,52 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
;
; BDVER2-LABEL: v4f32_two_step:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; BDVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm3 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %xmm1, %xmm3, %xmm1, %xmm1 # sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpps %xmm0, %xmm1
+; BDVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm3
+; BDVER2-NEXT: vfmaddps %xmm1, %xmm3, %xmm1, %xmm1
+; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v4f32_two_step:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
-; BTVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [2:1.00]
-; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [2:1.00]
-; BTVER2-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
-; BTVER2-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [2:1.00]
-; BTVER2-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
-; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BTVER2-NEXT: vrcpps %xmm0, %xmm1
+; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm2
+; BTVER2-NEXT: vsubps %xmm2, %xmm3, %xmm2
+; BTVER2-NEXT: vmulps %xmm2, %xmm1, %xmm2
+; BTVER2-NEXT: vaddps %xmm2, %xmm1, %xmm1
+; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; BTVER2-NEXT: vsubps %xmm0, %xmm3, %xmm0
+; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v4f32_two_step:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpps %xmm0, %xmm1
+; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2
+; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2
+; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2
+; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1
+; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0
+; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v4f32_two_step:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [5:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpps %xmm0, %xmm1
+; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NEXT: vmovaps %xmm1, %xmm3
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v4f32_two_step:
; HASWELL-NO-FMA: # %bb.0:
@@ -567,27 +495,16 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0
; HASWELL-NO-FMA-NEXT: retq
;
-; KNL-LABEL: v4f32_two_step:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [5:0.50]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [5:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: v4f32_two_step:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00]
-; SKX-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:0.33]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.50]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; AVX512-LABEL: v4f32_two_step:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vrcpps %xmm0, %xmm1
+; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; AVX512-NEXT: vmovaps %xmm1, %xmm3
+; AVX512-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2
+; AVX512-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1
+; AVX512-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2
+; AVX512-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3
+; AVX512-NEXT: retq
%div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <4 x float> %div
}
@@ -617,27 +534,27 @@ define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 {
;
; BDVER2-LABEL: v8f32_no_estimate:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovaps {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50]
-; BDVER2-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [9:19.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BDVER2-NEXT: vdivps %ymm0, %ymm1, %ymm0
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v8f32_no_estimate:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
-; BTVER2-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [38:38.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BTVER2-NEXT: vdivps %ymm0, %ymm1, %ymm0
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v8f32_no_estimate:
; SANDY: # %bb.0:
-; SANDY-NEXT: vmovaps {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; SANDY-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [29:28.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vmovaps {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; SANDY-NEXT: vdivps %ymm0, %ymm1, %ymm0
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v8f32_no_estimate:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; HASWELL-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [21:14.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NEXT: vdivps %ymm0, %ymm1, %ymm0
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v8f32_no_estimate:
; HASWELL-NO-FMA: # %bb.0:
@@ -645,17 +562,11 @@ define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 {
; HASWELL-NO-FMA-NEXT: vdivps %ymm0, %ymm1, %ymm0
; HASWELL-NO-FMA-NEXT: retq
;
-; KNL-LABEL: v8f32_no_estimate:
-; KNL: # %bb.0:
-; KNL-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; KNL-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [21:14.00]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: v8f32_no_estimate:
-; SKX: # %bb.0:
-; SKX-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; SKX-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [11:5.00]
-; SKX-NEXT: retq # sched: [7:1.00]
+; AVX512-LABEL: v8f32_no_estimate:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; AVX512-NEXT: vdivps %ymm0, %ymm1, %ymm0
+; AVX512-NEXT: retq
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <8 x float> %div
}
@@ -698,38 +609,38 @@ define <8 x float> @v8f32_one_step(<8 x float> %x) #1 {
;
; BDVER2-LABEL: v8f32_one_step:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:2.00]
-; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %ymm1, %ymm0, %ymm0 # sched: [10:1.00]
-; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpps %ymm0, %ymm1
+; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %ymm1, %ymm0, %ymm0
+; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v8f32_one_step:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
-; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BTVER2-NEXT: vrcpps %ymm0, %ymm1
+; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0
+; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0
+; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v8f32_one_step:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpps %ymm0, %ymm1
+; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0
+; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0
+; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v8f32_one_step:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [5:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpps %ymm0, %ymm1
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v8f32_one_step:
; HASWELL-NO-FMA: # %bb.0:
@@ -743,18 +654,18 @@ define <8 x float> @v8f32_one_step(<8 x float> %x) #1 {
;
; KNL-LABEL: v8f32_one_step:
; KNL: # %bb.0:
-; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
-; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [5:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
+; KNL-NEXT: vrcpps %ymm0, %ymm1
+; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
+; KNL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1
+; KNL-NEXT: retq
;
; SKX-LABEL: v8f32_one_step:
; SKX: # %bb.0:
-; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; SKX-NEXT: vrcpps %ymm0, %ymm1
+; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem
+; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1
+; SKX-NEXT: retq
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <8 x float> %div
}
@@ -817,52 +728,52 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
;
; BDVER2-LABEL: v8f32_two_step:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:2.00]
-; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm3 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %ymm1, %ymm3, %ymm1, %ymm1 # sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpps %ymm0, %ymm1
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm3
+; BDVER2-NEXT: vfmaddps %ymm1, %ymm3, %ymm1, %ymm1
+; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v8f32_two_step:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
-; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [2:2.00]
-; BTVER2-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BTVER2-NEXT: vrcpps %ymm0, %ymm1
+; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm2
+; BTVER2-NEXT: vsubps %ymm2, %ymm3, %ymm2
+; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm2
+; BTVER2-NEXT: vaddps %ymm2, %ymm1, %ymm1
+; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0
+; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0
+; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v8f32_two_step:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpps %ymm0, %ymm1
+; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2
+; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2
+; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2
+; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1
+; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0
+; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0
+; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v8f32_two_step:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; HASWELL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3 sched: [5:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpps %ymm0, %ymm1
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NEXT: vmovaps %ymm1, %ymm3
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v8f32_two_step:
; HASWELL-NO-FMA: # %bb.0:
@@ -878,27 +789,16 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0
; HASWELL-NO-FMA-NEXT: retq
;
-; KNL-LABEL: v8f32_two_step:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
-; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; KNL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [5:0.50]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3 sched: [5:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: v8f32_two_step:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00]
-; SKX-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:0.33]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [4:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [4:0.50]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2 sched: [4:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3 sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; AVX512-LABEL: v8f32_two_step:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vrcpps %ymm0, %ymm1
+; AVX512-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; AVX512-NEXT: vmovaps %ymm1, %ymm3
+; AVX512-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2
+; AVX512-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1
+; AVX512-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2
+; AVX512-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3
+; AVX512-NEXT: retq
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <8 x float> %div
}
@@ -936,31 +836,31 @@ define <16 x float> @v16f32_no_estimate(<16 x float> %x) #0 {
;
; BDVER2-LABEL: v16f32_no_estimate:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50]
-; BDVER2-NEXT: vdivps %ymm0, %ymm2, %ymm0 # sched: [9:19.00]
-; BDVER2-NEXT: vdivps %ymm1, %ymm2, %ymm1 # sched: [9:19.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BDVER2-NEXT: vdivps %ymm0, %ymm2, %ymm0
+; BDVER2-NEXT: vdivps %ymm1, %ymm2, %ymm1
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v16f32_no_estimate:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
-; BTVER2-NEXT: vdivps %ymm0, %ymm2, %ymm0 # sched: [38:38.00]
-; BTVER2-NEXT: vdivps %ymm1, %ymm2, %ymm1 # sched: [38:38.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BTVER2-NEXT: vdivps %ymm0, %ymm2, %ymm0
+; BTVER2-NEXT: vdivps %ymm1, %ymm2, %ymm1
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v16f32_no_estimate:
; SANDY: # %bb.0:
-; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; SANDY-NEXT: vdivps %ymm0, %ymm2, %ymm0 # sched: [29:28.00]
-; SANDY-NEXT: vdivps %ymm1, %ymm2, %ymm1 # sched: [29:28.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; SANDY-NEXT: vdivps %ymm0, %ymm2, %ymm0
+; SANDY-NEXT: vdivps %ymm1, %ymm2, %ymm1
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v16f32_no_estimate:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; HASWELL-NEXT: vdivps %ymm0, %ymm2, %ymm0 # sched: [21:14.00]
-; HASWELL-NEXT: vdivps %ymm1, %ymm2, %ymm1 # sched: [21:14.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NEXT: vdivps %ymm0, %ymm2, %ymm0
+; HASWELL-NEXT: vdivps %ymm1, %ymm2, %ymm1
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v16f32_no_estimate:
; HASWELL-NO-FMA: # %bb.0:
@@ -969,17 +869,11 @@ define <16 x float> @v16f32_no_estimate(<16 x float> %x) #0 {
; HASWELL-NO-FMA-NEXT: vdivps %ymm1, %ymm2, %ymm1
; HASWELL-NO-FMA-NEXT: retq
;
-; KNL-LABEL: v16f32_no_estimate:
-; KNL: # %bb.0:
-; KNL-NEXT: vbroadcastss {{.*#+}} zmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [10:1.00]
-; KNL-NEXT: vdivps %zmm0, %zmm1, %zmm0 # sched: [21:14.00]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: v16f32_no_estimate:
-; SKX: # %bb.0:
-; SKX-NEXT: vbroadcastss {{.*#+}} zmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [8:0.50]
-; SKX-NEXT: vdivps %zmm0, %zmm1, %zmm0 # sched: [18:10.00]
-; SKX-NEXT: retq # sched: [7:1.00]
+; AVX512-LABEL: v16f32_no_estimate:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vbroadcastss {{.*#+}} zmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; AVX512-NEXT: vdivps %zmm0, %zmm1, %zmm0
+; AVX512-NEXT: retq
%div = fdiv fast <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <16 x float> %div
}
@@ -1045,55 +939,55 @@ define <16 x float> @v16f32_one_step(<16 x float> %x) #1 {
;
; BDVER2-LABEL: v16f32_one_step:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [5:2.00]
-; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50]
-; BDVER2-NEXT: vrcpps %ymm1, %ymm4 # sched: [5:2.00]
-; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddps %ymm3, %ymm4, %ymm1, %ymm1 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %ymm4, %ymm1, %ymm4, %ymm1 # sched: [5:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpps %ymm0, %ymm2
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BDVER2-NEXT: vrcpps %ymm1, %ymm4
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm4, %ymm1, %ymm1
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0
+; BDVER2-NEXT: vfmaddps %ymm4, %ymm1, %ymm4, %ymm1
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v16f32_one_step:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
-; BTVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [2:2.00]
-; BTVER2-NEXT: vrcpps %ymm1, %ymm4 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps %ymm4, %ymm1, %ymm1 # sched: [2:2.00]
-; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vsubps %ymm1, %ymm3, %ymm1 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps %ymm1, %ymm4, %ymm1 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vaddps %ymm1, %ymm4, %ymm1 # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BTVER2-NEXT: vrcpps %ymm0, %ymm2
+; BTVER2-NEXT: vrcpps %ymm1, %ymm4
+; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm0
+; BTVER2-NEXT: vmulps %ymm4, %ymm1, %ymm1
+; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0
+; BTVER2-NEXT: vsubps %ymm1, %ymm3, %ymm1
+; BTVER2-NEXT: vmulps %ymm0, %ymm2, %ymm0
+; BTVER2-NEXT: vmulps %ymm1, %ymm4, %ymm1
+; BTVER2-NEXT: vaddps %ymm0, %ymm2, %ymm0
+; BTVER2-NEXT: vaddps %ymm1, %ymm4, %ymm1
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v16f32_one_step:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00]
-; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vrcpps %ymm1, %ymm2 # sched: [7:2.00]
-; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm1 # sched: [5:1.00]
-; SANDY-NEXT: vsubps %ymm1, %ymm3, %ymm1 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm1, %ymm2, %ymm1 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpps %ymm0, %ymm2
+; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0
+; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0
+; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0
+; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0
+; SANDY-NEXT: vrcpps %ymm1, %ymm2
+; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm1
+; SANDY-NEXT: vsubps %ymm1, %ymm3, %ymm1
+; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1
+; SANDY-NEXT: vaddps %ymm1, %ymm2, %ymm1
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v16f32_one_step:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %ymm0, %ymm2 # sched: [11:2.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; HASWELL-NEXT: vrcpps %ymm1, %ymm4 # sched: [11:2.00]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm3 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm4 * ymm1) + ymm3 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm4) + ymm4 sched: [5:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpps %ymm0, %ymm2
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NEXT: vrcpps %ymm1, %ymm4
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm3
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm4 * ymm1) + ymm3
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm4) + ymm4
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v16f32_one_step:
; HASWELL-NO-FMA: # %bb.0:
@@ -1110,19 +1004,12 @@ define <16 x float> @v16f32_one_step(<16 x float> %x) #1 {
; HASWELL-NO-FMA-NEXT: vaddps %ymm1, %ymm2, %ymm1
; HASWELL-NO-FMA-NEXT: retq
;
-; KNL-LABEL: v16f32_one_step:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [11:2.00]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [12:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [5:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: v16f32_one_step:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [4:2.00]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [11:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; AVX512-LABEL: v16f32_one_step:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vrcp14ps %zmm0, %zmm1
+; AVX512-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem
+; AVX512-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1
+; AVX512-NEXT: retq
%div = fdiv fast <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <16 x float> %div
}
@@ -1226,81 +1113,81 @@ define <16 x float> @v16f32_two_step(<16 x float> %x) #2 {
;
; BDVER2-LABEL: v16f32_two_step:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [5:2.00]
-; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm4 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2 # sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [5:2.00]
-; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm4 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2 # sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1 # sched: [5:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpps %ymm0, %ymm2
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm4
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0
+; BDVER2-NEXT: vrcpps %ymm1, %ymm2
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm4
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v16f32_two_step:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
-; BTVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm3 # sched: [2:2.00]
-; BTVER2-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm3, %ymm2, %ymm3 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm3, %ymm2, %ymm2 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vsubps %ymm0, %ymm4, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm3 # sched: [2:2.00]
-; BTVER2-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm3, %ymm2, %ymm3 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm3, %ymm2, %ymm2 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm1 # sched: [2:2.00]
-; BTVER2-NEXT: vsubps %ymm1, %ymm4, %ymm1 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm1, %ymm2, %ymm1 # sched: [3:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BTVER2-NEXT: vrcpps %ymm0, %ymm2
+; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm3
+; BTVER2-NEXT: vsubps %ymm3, %ymm4, %ymm3
+; BTVER2-NEXT: vmulps %ymm3, %ymm2, %ymm3
+; BTVER2-NEXT: vaddps %ymm3, %ymm2, %ymm2
+; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm0
+; BTVER2-NEXT: vsubps %ymm0, %ymm4, %ymm0
+; BTVER2-NEXT: vmulps %ymm0, %ymm2, %ymm0
+; BTVER2-NEXT: vaddps %ymm0, %ymm2, %ymm0
+; BTVER2-NEXT: vrcpps %ymm1, %ymm2
+; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm3
+; BTVER2-NEXT: vsubps %ymm3, %ymm4, %ymm3
+; BTVER2-NEXT: vmulps %ymm3, %ymm2, %ymm3
+; BTVER2-NEXT: vaddps %ymm3, %ymm2, %ymm2
+; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm1
+; BTVER2-NEXT: vsubps %ymm1, %ymm4, %ymm1
+; BTVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1
+; BTVER2-NEXT: vaddps %ymm1, %ymm2, %ymm1
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v16f32_two_step:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00]
-; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm3 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; SANDY-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm3 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm3, %ymm2, %ymm2 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vsubps %ymm0, %ymm4, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vrcpps %ymm1, %ymm2 # sched: [7:2.00]
-; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm3 # sched: [5:1.00]
-; SANDY-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm3 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm3, %ymm2, %ymm2 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm1 # sched: [5:1.00]
-; SANDY-NEXT: vsubps %ymm1, %ymm4, %ymm1 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm1, %ymm2, %ymm1 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpps %ymm0, %ymm2
+; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm3
+; SANDY-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; SANDY-NEXT: vsubps %ymm3, %ymm4, %ymm3
+; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm3
+; SANDY-NEXT: vaddps %ymm3, %ymm2, %ymm2
+; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0
+; SANDY-NEXT: vsubps %ymm0, %ymm4, %ymm0
+; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0
+; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0
+; SANDY-NEXT: vrcpps %ymm1, %ymm2
+; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm3
+; SANDY-NEXT: vsubps %ymm3, %ymm4, %ymm3
+; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm3
+; SANDY-NEXT: vaddps %ymm3, %ymm2, %ymm2
+; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm1
+; SANDY-NEXT: vsubps %ymm1, %ymm4, %ymm1
+; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1
+; SANDY-NEXT: vaddps %ymm1, %ymm2, %ymm1
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v16f32_two_step:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %ymm0, %ymm2 # sched: [11:2.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; HASWELL-NEXT: vmovaps %ymm2, %ymm4 # sched: [1:1.00]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm4 = -(ymm0 * ymm4) + ymm3 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm4 * ymm0) + ymm3 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm4) + ymm4 sched: [5:0.50]
-; HASWELL-NEXT: vrcpps %ymm1, %ymm2 # sched: [11:2.00]
-; HASWELL-NEXT: vmovaps %ymm2, %ymm4 # sched: [1:1.00]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm4 = -(ymm1 * ymm4) + ymm3 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm4 * ymm1) + ymm3 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm4) + ymm4 sched: [5:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpps %ymm0, %ymm2
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NEXT: vmovaps %ymm2, %ymm4
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm4 = -(ymm0 * ymm4) + ymm3
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm4 * ymm0) + ymm3
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm4) + ymm4
+; HASWELL-NEXT: vrcpps %ymm1, %ymm2
+; HASWELL-NEXT: vmovaps %ymm2, %ymm4
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm4 = -(ymm1 * ymm4) + ymm3
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm4 * ymm1) + ymm3
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm4) + ymm4
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v16f32_two_step:
; HASWELL-NO-FMA: # %bb.0:
@@ -1325,27 +1212,16 @@ define <16 x float> @v16f32_two_step(<16 x float> %x) #2 {
; HASWELL-NO-FMA-NEXT: vaddps %ymm1, %ymm2, %ymm1
; HASWELL-NO-FMA-NEXT: retq
;
-; KNL-LABEL: v16f32_two_step:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [11:2.00]
-; KNL-NEXT: vbroadcastss {{.*#+}} zmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [10:1.00]
-; KNL-NEXT: vmovaps %zmm1, %zmm3 # sched: [1:1.00]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1 sched: [5:0.50]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3 sched: [5:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: v16f32_two_step:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [4:2.00]
-; SKX-NEXT: vbroadcastss {{.*#+}} zmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [8:0.50]
-; SKX-NEXT: vmovaps %zmm1, %zmm3 # sched: [1:0.33]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [4:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1 sched: [4:0.50]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2 sched: [4:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3 sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; AVX512-LABEL: v16f32_two_step:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vrcp14ps %zmm0, %zmm1
+; AVX512-NEXT: vbroadcastss {{.*#+}} zmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; AVX512-NEXT: vmovaps %zmm1, %zmm3
+; AVX512-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2
+; AVX512-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1
+; AVX512-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2
+; AVX512-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3
+; AVX512-NEXT: retq
%div = fdiv fast <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <16 x float> %div
}
diff --git a/llvm/test/CodeGen/X86/recip-fastmath2.ll b/llvm/test/CodeGen/X86/recip-fastmath2.ll
index dbe2689077e..de511b411b3 100644
--- a/llvm/test/CodeGen/X86/recip-fastmath2.ll
+++ b/llvm/test/CodeGen/X86/recip-fastmath2.ll
@@ -1,14 +1,14 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE-RECIP
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX-RECIP
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=FMA-RECIP
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BDVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -print-schedule -mattr=-fma | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL-NO-FMA
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 --check-prefix=KNL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 --check-prefix=SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE-RECIP
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX-RECIP
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=FMA-RECIP
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BDVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BTVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=SANDY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -mattr=-fma | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL-NO-FMA
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 --check-prefix=KNL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512 --check-prefix=SKX
; It's the extra tests coverage for recip as discussed on D26855.
@@ -19,59 +19,11 @@ define float @f32_no_step_2(float %x) #3 {
; SSE-NEXT: mulss {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
-; AVX-RECIP-LABEL: f32_no_step_2:
-; AVX-RECIP: # %bb.0:
-; AVX-RECIP-NEXT: vrcpss %xmm0, %xmm0, %xmm0
-; AVX-RECIP-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
-; AVX-RECIP-NEXT: retq
-;
-; FMA-RECIP-LABEL: f32_no_step_2:
-; FMA-RECIP: # %bb.0:
-; FMA-RECIP-NEXT: vrcpss %xmm0, %xmm0, %xmm0
-; FMA-RECIP-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
-; FMA-RECIP-NEXT: retq
-;
-; BDVER2-LABEL: f32_no_step_2:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: f32_no_step_2:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: f32_no_step_2:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: f32_no_step_2:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-NO-FMA-LABEL: f32_no_step_2:
-; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50]
-; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: f32_no_step_2:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; KNL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: f32_no_step_2:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
-; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; AVX-LABEL: f32_no_step_2:
+; AVX: # %bb.0:
+; AVX-NEXT: vrcpss %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: retq
%div = fdiv fast float 1234.0, %x
ret float %div
}
@@ -110,68 +62,60 @@ define float @f32_one_step_2(float %x) #1 {
;
; BDVER2-LABEL: f32_one_step_2:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; BDVER2-NEXT: vfnmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; BDVER2-NEXT: vfnmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0
+; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0
+; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: f32_one_step_2:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
-; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [2:1.00]
-; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; BTVER2-NEXT: vsubss %xmm0, %xmm2, %xmm0
+; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: f32_one_step_2:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
-; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0
+; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: f32_one_step_2:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50]
-; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem
+; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
+; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: f32_one_step_2:
; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50]
-; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: f32_one_step_2:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; KNL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; KNL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50]
-; KNL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: f32_one_step_2:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
-; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50]
-; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50]
-; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm2, %xmm0
+; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0
+; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
+; HASWELL-NO-FMA-NEXT: retq
+;
+; AVX512-LABEL: f32_one_step_2:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; AVX512-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem
+; AVX512-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
+; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: retq
%div = fdiv fast float 3456.0, %x
ret float %div
}
@@ -213,75 +157,66 @@ define float @f32_one_step_2_divs(float %x) #1 {
;
; BDVER2-LABEL: f32_one_step_2_divs:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; BDVER2-NEXT: vfnmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [10:1.00]
-; BDVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; BDVER2-NEXT: vfnmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0
+; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0
+; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1
+; BDVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: f32_one_step_2_divs:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
-; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [2:1.00]
-; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [7:1.00]
-; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; BTVER2-NEXT: vsubss %xmm0, %xmm2, %xmm0
+; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1
+; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: f32_one_step_2_divs:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
-; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:1.00]
-; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0
+; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1
+; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: f32_one_step_2_divs:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50]
-; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [10:0.50]
-; HASWELL-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem
+; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
+; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1
+; HASWELL-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: f32_one_step_2_divs:
; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [10:0.50]
-; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: f32_one_step_2_divs:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; KNL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; KNL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50]
-; KNL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [10:0.50]
-; KNL-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: f32_one_step_2_divs:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
-; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50]
-; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50]
-; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50]
-; SKX-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm2, %xmm0
+; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0
+; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1
+; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; HASWELL-NO-FMA-NEXT: retq
+;
+; AVX512-LABEL: f32_one_step_2_divs:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; AVX512-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem
+; AVX512-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
+; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1
+; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: retq
%div = fdiv fast float 3456.0, %x
%div2 = fdiv fast float %div, %x
ret float %div2
@@ -335,95 +270,83 @@ define float @f32_two_step_2(float %x) #2 {
;
; BDVER2-LABEL: f32_two_step_2:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; BDVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm3 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddss %xmm1, %xmm3, %xmm1, %xmm1 # sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; BDVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm3
+; BDVER2-NEXT: vfmaddss %xmm1, %xmm3, %xmm1, %xmm1
+; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0
+; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: f32_two_step_2:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [5:1.00]
-; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [2:1.00]
-; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [2:1.00]
-; BTVER2-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
-; BTVER2-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [2:1.00]
-; BTVER2-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
-; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm2
+; BTVER2-NEXT: vsubss %xmm2, %xmm3, %xmm2
+; BTVER2-NEXT: vmulss %xmm2, %xmm1, %xmm2
+; BTVER2-NEXT: vaddss %xmm2, %xmm1, %xmm1
+; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; BTVER2-NEXT: vsubss %xmm0, %xmm3, %xmm0
+; BTVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: vaddss %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: f32_two_step_2:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
-; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [6:0.50]
-; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
-; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
-; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
-; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2
+; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2
+; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2
+; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1
+; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0
+; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: f32_two_step_2:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
-; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [5:0.50]
-; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; HASWELL-NEXT: vmovaps %xmm1, %xmm3
+; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2
+; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1
+; HASWELL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2
+; HASWELL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3
+; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: f32_two_step_2:
; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50]
-; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: f32_two_step_2:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
-; KNL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
-; KNL-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [5:0.50]
-; KNL-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [5:0.50]
-; KNL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: f32_two_step_2:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00]
-; SKX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:0.33]
-; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.50]
-; SKX-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.50]
-; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.50]
-; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.50]
-; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm2
+; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; HASWELL-NO-FMA-NEXT: vsubss %xmm2, %xmm3, %xmm2
+; HASWELL-NO-FMA-NEXT: vmulss %xmm2, %xmm1, %xmm2
+; HASWELL-NO-FMA-NEXT: vaddss %xmm2, %xmm1, %xmm1
+; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0
+; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm3, %xmm0
+; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0
+; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0
+; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
+; HASWELL-NO-FMA-NEXT: retq
+;
+; AVX512-LABEL: f32_two_step_2:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vrcpss %xmm0, %xmm0, %xmm1
+; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; AVX512-NEXT: vmovaps %xmm1, %xmm3
+; AVX512-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2
+; AVX512-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1
+; AVX512-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2
+; AVX512-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3
+; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: retq
%div = fdiv fast float 6789.0, %x
ret float %div
}
@@ -462,70 +385,70 @@ define <4 x float> @v4f32_one_step2(<4 x float> %x) #1 {
;
; BDVER2-LABEL: v4f32_one_step2:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpps %xmm0, %xmm1
+; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %xmm1, %xmm0, %xmm0
+; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v4f32_one_step2:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
-; BTVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [2:1.00]
-; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BTVER2-NEXT: vrcpps %xmm0, %xmm1
+; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; BTVER2-NEXT: vsubps %xmm0, %xmm2, %xmm0
+; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v4f32_one_step2:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpps %xmm0, %xmm1
+; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0
+; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v4f32_one_step2:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50]
-; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpps %xmm0, %xmm1
+; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
+; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v4f32_one_step2:
; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:0.50]
-; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00]
+; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1
+; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0
+; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
+; HASWELL-NO-FMA-NEXT: retq
;
; KNL-LABEL: v4f32_one_step2:
; KNL: # %bb.0:
-; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50]
-; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
+; KNL-NEXT: vrcpps %xmm0, %xmm1
+; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
+; KNL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
+; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
+; KNL-NEXT: retq
;
; SKX-LABEL: v4f32_one_step2:
; SKX: # %bb.0:
-; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50]
-; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; SKX-NEXT: vrcpps %xmm0, %xmm1
+; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem
+; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
+; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
+; SKX-NEXT: retq
%div = fdiv fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x
ret <4 x float> %div
}
@@ -567,77 +490,77 @@ define <4 x float> @v4f32_one_step_2_divs(<4 x float> %x) #1 {
;
; BDVER2-LABEL: v4f32_one_step_2_divs:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [10:1.00]
-; BDVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpps %xmm0, %xmm1
+; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %xmm1, %xmm0, %xmm0
+; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1
+; BDVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v4f32_one_step_2_divs:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
-; BTVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [2:1.00]
-; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [7:1.00]
-; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BTVER2-NEXT: vrcpps %xmm0, %xmm1
+; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; BTVER2-NEXT: vsubps %xmm0, %xmm2, %xmm0
+; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1
+; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v4f32_one_step_2_divs:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:1.00]
-; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpps %xmm0, %xmm1
+; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0
+; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1
+; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v4f32_one_step_2_divs:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50]
-; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:0.50]
-; HASWELL-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpps %xmm0, %xmm1
+; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
+; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1
+; HASWELL-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v4f32_one_step_2_divs:
; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:0.50]
-; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00]
+; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1
+; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0
+; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1
+; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; HASWELL-NO-FMA-NEXT: retq
;
; KNL-LABEL: v4f32_one_step_2_divs:
; KNL: # %bb.0:
-; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [5:0.50]
-; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:0.50]
-; KNL-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
+; KNL-NEXT: vrcpps %xmm0, %xmm1
+; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
+; KNL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
+; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1
+; KNL-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; KNL-NEXT: retq
;
; SKX-LABEL: v4f32_one_step_2_divs:
; SKX: # %bb.0:
-; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50]
-; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [10:0.50]
-; SKX-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; SKX-NEXT: vrcpps %xmm0, %xmm1
+; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem
+; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
+; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1
+; SKX-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; SKX-NEXT: retq
%div = fdiv fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x
%div2 = fdiv fast <4 x float> %div, %x
ret <4 x float> %div2
@@ -691,95 +614,83 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
;
; BDVER2-LABEL: v4f32_two_step2:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; BDVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm3 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %xmm1, %xmm3, %xmm1, %xmm1 # sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpps %xmm0, %xmm1
+; BDVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm3
+; BDVER2-NEXT: vfmaddps %xmm1, %xmm3, %xmm1, %xmm1
+; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0
+; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v4f32_two_step2:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
-; BTVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [2:1.00]
-; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [2:1.00]
-; BTVER2-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
-; BTVER2-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [2:1.00]
-; BTVER2-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
-; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BTVER2-NEXT: vrcpps %xmm0, %xmm1
+; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm2
+; BTVER2-NEXT: vsubps %xmm2, %xmm3, %xmm2
+; BTVER2-NEXT: vmulps %xmm2, %xmm1, %xmm2
+; BTVER2-NEXT: vaddps %xmm2, %xmm1, %xmm1
+; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; BTVER2-NEXT: vsubps %xmm0, %xmm3, %xmm0
+; BTVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v4f32_two_step2:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpps %xmm0, %xmm1
+; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2
+; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2
+; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2
+; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1
+; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0
+; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v4f32_two_step2:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [5:0.50]
-; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpps %xmm0, %xmm1
+; HASWELL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NEXT: vmovaps %xmm1, %xmm3
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3
+; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v4f32_two_step2:
; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; HASWELL-NO-FMA-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:0.50]
-; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: v4f32_two_step2:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
-; KNL-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [5:0.50]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [5:0.50]
-; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: v4f32_two_step2:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00]
-; SKX-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [6:0.50]
-; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:0.33]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.50]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.50]
-; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1
+; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm2
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NO-FMA-NEXT: vsubps %xmm2, %xmm3, %xmm2
+; HASWELL-NO-FMA-NEXT: vmulps %xmm2, %xmm1, %xmm2
+; HASWELL-NO-FMA-NEXT: vaddps %xmm2, %xmm1, %xmm1
+; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm3, %xmm0
+; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0
+; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
+; HASWELL-NO-FMA-NEXT: retq
+;
+; AVX512-LABEL: v4f32_two_step2:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vrcpps %xmm0, %xmm1
+; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; AVX512-NEXT: vmovaps %xmm1, %xmm3
+; AVX512-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2
+; AVX512-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1
+; AVX512-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2
+; AVX512-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3
+; AVX512-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: retq
%div = fdiv fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x
ret <4 x float> %div
}
@@ -826,70 +737,70 @@ define <8 x float> @v8f32_one_step2(<8 x float> %x) #1 {
;
; BDVER2-LABEL: v8f32_one_step2:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:2.00]
-; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %ymm1, %ymm0, %ymm0 # sched: [10:1.00]
-; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [10:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpps %ymm0, %ymm1
+; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %ymm1, %ymm0, %ymm0
+; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v8f32_one_step2:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
-; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [7:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BTVER2-NEXT: vrcpps %ymm0, %ymm1
+; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0
+; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0
+; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v8f32_one_step2:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpps %ymm0, %ymm1
+; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0
+; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0
+; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v8f32_one_step2:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [5:0.50]
-; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpps %ymm0, %ymm1
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1
+; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v8f32_one_step2:
; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50]
-; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00]
+; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1
+; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0
+; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; HASWELL-NO-FMA-NEXT: retq
;
; KNL-LABEL: v8f32_one_step2:
; KNL: # %bb.0:
-; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
-; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [5:0.50]
-; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
+; KNL-NEXT: vrcpps %ymm0, %ymm1
+; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
+; KNL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1
+; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; KNL-NEXT: retq
;
; SKX-LABEL: v8f32_one_step2:
; SKX: # %bb.0:
-; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [4:0.50]
-; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; SKX-NEXT: vrcpps %ymm0, %ymm1
+; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem
+; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1
+; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; SKX-NEXT: retq
%div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x
ret <8 x float> %div
}
@@ -940,77 +851,77 @@ define <8 x float> @v8f32_one_step_2_divs(<8 x float> %x) #1 {
;
; BDVER2-LABEL: v8f32_one_step_2_divs:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:2.00]
-; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %ymm1, %ymm0, %ymm0 # sched: [10:1.00]
-; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [10:2.00]
-; BDVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpps %ymm0, %ymm1
+; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %ymm1, %ymm0, %ymm0
+; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1
+; BDVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v8f32_one_step_2_divs:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
-; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [7:2.00]
-; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BTVER2-NEXT: vrcpps %ymm0, %ymm1
+; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0
+; BTVER2-NEXT: vsubps %ymm0, %ymm2, %ymm0
+; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1
+; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v8f32_one_step_2_divs:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [12:1.00]
-; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpps %ymm0, %ymm1
+; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0
+; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0
+; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1
+; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v8f32_one_step_2_divs:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [5:0.50]
-; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [12:0.50]
-; HASWELL-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpps %ymm0, %ymm1
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1
+; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1
+; HASWELL-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v8f32_one_step_2_divs:
; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [12:0.50]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00]
+; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1
+; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0
+; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1
+; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; HASWELL-NO-FMA-NEXT: retq
;
; KNL-LABEL: v8f32_one_step_2_divs:
; KNL: # %bb.0:
-; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
-; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [5:0.50]
-; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [12:0.50]
-; KNL-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
+; KNL-NEXT: vrcpps %ymm0, %ymm1
+; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
+; KNL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1
+; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1
+; KNL-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; KNL-NEXT: retq
;
; SKX-LABEL: v8f32_one_step_2_divs:
; SKX: # %bb.0:
-; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [4:0.50]
-; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [11:0.50]
-; SKX-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; SKX-NEXT: vrcpps %ymm0, %ymm1
+; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem
+; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1
+; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1
+; SKX-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; SKX-NEXT: retq
%div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x
%div2 = fdiv fast <8 x float> %div, %x
ret <8 x float> %div2
@@ -1078,95 +989,83 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 {
;
; BDVER2-LABEL: v8f32_two_step2:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:2.00]
-; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm3 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %ymm1, %ymm3, %ymm1, %ymm1 # sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [10:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpps %ymm0, %ymm1
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm3
+; BDVER2-NEXT: vfmaddps %ymm1, %ymm3, %ymm1, %ymm1
+; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0
+; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v8f32_two_step2:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
-; BTVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [2:2.00]
-; BTVER2-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [7:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BTVER2-NEXT: vrcpps %ymm0, %ymm1
+; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm2
+; BTVER2-NEXT: vsubps %ymm2, %ymm3, %ymm2
+; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm2
+; BTVER2-NEXT: vaddps %ymm2, %ymm1, %ymm1
+; BTVER2-NEXT: vmulps %ymm1, %ymm0, %ymm0
+; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0
+; BTVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v8f32_two_step2:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
-; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpps %ymm0, %ymm1
+; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2
+; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2
+; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2
+; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1
+; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0
+; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0
+; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v8f32_two_step2:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; HASWELL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3 sched: [5:0.50]
-; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpps %ymm0, %ymm1
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NEXT: vmovaps %ymm1, %ymm3
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3
+; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v8f32_two_step2:
; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; HASWELL-NO-FMA-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50]
-; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: v8f32_two_step2:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00]
-; KNL-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; KNL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [5:0.50]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3 sched: [5:0.50]
-; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: v8f32_two_step2:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00]
-; SKX-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:0.33]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [4:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [4:0.50]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2 sched: [4:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3 sched: [4:0.50]
-; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1
+; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm2
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NO-FMA-NEXT: vsubps %ymm2, %ymm3, %ymm2
+; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm2
+; HASWELL-NO-FMA-NEXT: vaddps %ymm2, %ymm1, %ymm1
+; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0
+; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm3, %ymm0
+; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0
+; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; HASWELL-NO-FMA-NEXT: retq
+;
+; AVX512-LABEL: v8f32_two_step2:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vrcpps %ymm0, %ymm1
+; AVX512-NEXT: vbroadcastss {{.*#+}} ymm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; AVX512-NEXT: vmovaps %ymm1, %ymm3
+; AVX512-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2
+; AVX512-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1
+; AVX512-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2
+; AVX512-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3
+; AVX512-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; AVX512-NEXT: retq
%div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x
ret <8 x float> %div
}
@@ -1178,50 +1077,10 @@ define <8 x float> @v8f32_no_step(<8 x float> %x) #3 {
; SSE-NEXT: rcpps %xmm1, %xmm1
; SSE-NEXT: retq
;
-; AVX-RECIP-LABEL: v8f32_no_step:
-; AVX-RECIP: # %bb.0:
-; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm0
-; AVX-RECIP-NEXT: retq
-;
-; FMA-RECIP-LABEL: v8f32_no_step:
-; FMA-RECIP: # %bb.0:
-; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm0
-; FMA-RECIP-NEXT: retq
-;
-; BDVER2-LABEL: v8f32_no_step:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: v8f32_no_step:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: v8f32_no_step:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: v8f32_no_step:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-NO-FMA-LABEL: v8f32_no_step:
-; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
-; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: v8f32_no_step:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: v8f32_no_step:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcpps %ymm0, %ymm0 # sched: [4:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
+; AVX-LABEL: v8f32_no_step:
+; AVX: # %bb.0:
+; AVX-NEXT: vrcpps %ymm0, %ymm0
+; AVX-NEXT: retq
%div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <8 x float> %div
}
@@ -1235,59 +1094,11 @@ define <8 x float> @v8f32_no_step2(<8 x float> %x) #3 {
; SSE-NEXT: mulps {{.*}}(%rip), %xmm1
; SSE-NEXT: retq
;
-; AVX-RECIP-LABEL: v8f32_no_step2:
-; AVX-RECIP: # %bb.0:
-; AVX-RECIP-NEXT: vrcpps %ymm0, %ymm0
-; AVX-RECIP-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
-; AVX-RECIP-NEXT: retq
-;
-; FMA-RECIP-LABEL: v8f32_no_step2:
-; FMA-RECIP: # %bb.0:
-; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm0
-; FMA-RECIP-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
-; FMA-RECIP-NEXT: retq
-;
-; BDVER2-LABEL: v8f32_no_step2:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [10:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: v8f32_no_step2:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [7:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: v8f32_no_step2:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
-; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: v8f32_no_step2:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
-; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-NO-FMA-LABEL: v8f32_no_step2:
-; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
-; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50]
-; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: v8f32_no_step2:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
-; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: v8f32_no_step2:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcpps %ymm0, %ymm0 # sched: [4:1.00]
-; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; AVX-LABEL: v8f32_no_step2:
+; AVX: # %bb.0:
+; AVX-NEXT: vrcpps %ymm0, %ymm0
+; AVX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; AVX-NEXT: retq
%div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x
ret <8 x float> %div
}
@@ -1361,96 +1172,88 @@ define <16 x float> @v16f32_one_step2(<16 x float> %x) #1 {
;
; BDVER2-LABEL: v16f32_one_step2:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [5:2.00]
-; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50]
-; BDVER2-NEXT: vrcpps %ymm0, %ymm4 # sched: [5:2.00]
-; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1 # sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddps %ymm3, %ymm4, %ymm0, %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %ymm4, %ymm0, %ymm4, %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [10:2.00]
-; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [10:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpps %ymm1, %ymm2
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BDVER2-NEXT: vrcpps %ymm0, %ymm4
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm4, %ymm0, %ymm0
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1
+; BDVER2-NEXT: vfmaddps %ymm4, %ymm0, %ymm4, %ymm0
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v16f32_one_step2:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
-; BTVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [2:2.00]
-; BTVER2-NEXT: vrcpps %ymm0, %ymm4 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm1 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps %ymm4, %ymm0, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vsubps %ymm1, %ymm3, %ymm1 # sched: [3:2.00]
-; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps %ymm0, %ymm4, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm1, %ymm2, %ymm1 # sched: [3:2.00]
-; BTVER2-NEXT: vaddps %ymm0, %ymm4, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [7:2.00]
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [7:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BTVER2-NEXT: vrcpps %ymm1, %ymm2
+; BTVER2-NEXT: vrcpps %ymm0, %ymm4
+; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm1
+; BTVER2-NEXT: vmulps %ymm4, %ymm0, %ymm0
+; BTVER2-NEXT: vsubps %ymm1, %ymm3, %ymm1
+; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0
+; BTVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1
+; BTVER2-NEXT: vmulps %ymm0, %ymm4, %ymm0
+; BTVER2-NEXT: vaddps %ymm1, %ymm2, %ymm1
+; BTVER2-NEXT: vaddps %ymm0, %ymm4, %ymm0
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v16f32_one_step2:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %ymm1, %ymm2 # sched: [7:2.00]
-; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm1 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; SANDY-NEXT: vsubps %ymm1, %ymm3, %ymm1 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm1, %ymm2, %ymm1 # sched: [3:1.00]
-; SANDY-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00]
-; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
-; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpps %ymm1, %ymm2
+; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm1
+; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; SANDY-NEXT: vsubps %ymm1, %ymm3, %ymm1
+; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1
+; SANDY-NEXT: vaddps %ymm1, %ymm2, %ymm1
+; SANDY-NEXT: vrcpps %ymm0, %ymm2
+; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0
+; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0
+; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0
+; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0
+; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v16f32_one_step2:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %ymm1, %ymm2 # sched: [11:2.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; HASWELL-NEXT: vrcpps %ymm0, %ymm4 # sched: [11:2.00]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm2 * ymm1) + ymm3 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm2) + ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm4 * ymm0) + ymm3 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm4) + ymm4 sched: [5:0.50]
-; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50]
-; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpps %ymm1, %ymm2
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NEXT: vrcpps %ymm0, %ymm4
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm2 * ymm1) + ymm3
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm2) + ymm2
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm4 * ymm0) + ymm3
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm4) + ymm4
+; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v16f32_one_step2:
; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vrcpps %ymm1, %ymm2 # sched: [11:2.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm1 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; HASWELL-NO-FMA-NEXT: vsubps %ymm1, %ymm3, %ymm1 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddps %ymm1, %ymm2, %ymm1 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm2 # sched: [11:2.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50]
-; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:0.50]
-; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: v16f32_one_step2:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [11:2.00]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [12:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [5:0.50]
-; KNL-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [12:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: v16f32_one_step2:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [4:2.00]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [11:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [4:0.50]
-; SKX-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; HASWELL-NO-FMA-NEXT: vrcpps %ymm1, %ymm2
+; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm1
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NO-FMA-NEXT: vsubps %ymm1, %ymm3, %ymm1
+; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm2, %ymm1
+; HASWELL-NO-FMA-NEXT: vaddps %ymm1, %ymm2, %ymm1
+; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm2
+; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm0, %ymm0
+; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm3, %ymm0
+; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm2, %ymm0
+; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm2, %ymm0
+; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
+; HASWELL-NO-FMA-NEXT: retq
+;
+; AVX512-LABEL: v16f32_one_step2:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vrcp14ps %zmm0, %zmm1
+; AVX512-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem
+; AVX512-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1
+; AVX512-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0
+; AVX512-NEXT: retq
%div = fdiv fast <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, %x
ret <16 x float> %div
}
@@ -1532,108 +1335,99 @@ define <16 x float> @v16f32_one_step_2_divs(<16 x float> %x) #1 {
;
; BDVER2-LABEL: v16f32_one_step_2_divs:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [5:2.00]
-; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [5:2.00]
-; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1 # sched: [5:0.50]
-; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm3 # sched: [10:2.00]
-; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1 # sched: [5:0.50]
-; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm2 # sched: [10:2.00]
-; BDVER2-NEXT: vmulps %ymm0, %ymm3, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpps %ymm0, %ymm2
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0
+; BDVER2-NEXT: vrcpps %ymm1, %ymm2
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm3
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm2
+; BDVER2-NEXT: vmulps %ymm0, %ymm3, %ymm0
+; BDVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v16f32_one_step_2_divs:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
-; BTVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm1 # sched: [2:2.00]
-; BTVER2-NEXT: vsubps %ymm1, %ymm3, %ymm1 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm3 # sched: [7:2.00]
-; BTVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm1, %ymm2, %ymm1 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm2 # sched: [7:2.00]
-; BTVER2-NEXT: vmulps %ymm0, %ymm3, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [2:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BTVER2-NEXT: vrcpps %ymm0, %ymm2
+; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm0
+; BTVER2-NEXT: vsubps %ymm0, %ymm3, %ymm0
+; BTVER2-NEXT: vmulps %ymm0, %ymm2, %ymm0
+; BTVER2-NEXT: vaddps %ymm0, %ymm2, %ymm0
+; BTVER2-NEXT: vrcpps %ymm1, %ymm2
+; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm1
+; BTVER2-NEXT: vsubps %ymm1, %ymm3, %ymm1
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm3
+; BTVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1
+; BTVER2-NEXT: vaddps %ymm1, %ymm2, %ymm1
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm2
+; BTVER2-NEXT: vmulps %ymm0, %ymm3, %ymm0
+; BTVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v16f32_one_step_2_divs:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00]
-; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vrcpps %ymm1, %ymm4 # sched: [7:2.00]
-; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm4, %ymm1, %ymm1 # sched: [5:1.00]
-; SANDY-NEXT: vsubps %ymm1, %ymm3, %ymm1 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm1, %ymm4, %ymm1 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm1, %ymm4, %ymm1 # sched: [3:1.00]
-; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm2 # sched: [12:1.00]
-; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm3 # sched: [12:1.00]
-; SANDY-NEXT: vmulps %ymm0, %ymm3, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpps %ymm0, %ymm2
+; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0
+; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0
+; SANDY-NEXT: vrcpps %ymm1, %ymm4
+; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0
+; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0
+; SANDY-NEXT: vmulps %ymm4, %ymm1, %ymm1
+; SANDY-NEXT: vsubps %ymm1, %ymm3, %ymm1
+; SANDY-NEXT: vmulps %ymm1, %ymm4, %ymm1
+; SANDY-NEXT: vaddps %ymm1, %ymm4, %ymm1
+; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm2
+; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm3
+; SANDY-NEXT: vmulps %ymm0, %ymm3, %ymm0
+; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v16f32_one_step_2_divs:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %ymm0, %ymm2 # sched: [11:2.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm3 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vrcpps %ymm1, %ymm2 # sched: [11:2.00]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm2 * ymm1) + ymm3 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm2) + ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm2 # sched: [12:0.50]
-; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm3 # sched: [12:0.50]
-; HASWELL-NEXT: vmulps %ymm0, %ymm3, %ymm0 # sched: [5:0.50]
-; HASWELL-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpps %ymm0, %ymm2
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm3
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm2
+; HASWELL-NEXT: vrcpps %ymm1, %ymm2
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm2 * ymm1) + ymm3
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm2) + ymm2
+; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm2
+; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm3
+; HASWELL-NEXT: vmulps %ymm0, %ymm3, %ymm0
+; HASWELL-NEXT: vmulps %ymm1, %ymm2, %ymm1
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v16f32_one_step_2_divs:
; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm2 # sched: [11:2.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vrcpps %ymm1, %ymm4 # sched: [11:2.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm4, %ymm1, %ymm1 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vsubps %ymm1, %ymm3, %ymm1 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm4, %ymm1 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddps %ymm1, %ymm4, %ymm1 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm2 # sched: [12:0.50]
-; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm3 # sched: [12:0.50]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm3, %ymm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: v16f32_one_step_2_divs:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [11:2.00]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [12:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [5:0.50]
-; KNL-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm1 # sched: [12:0.50]
-; KNL-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [5:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: v16f32_one_step_2_divs:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [4:2.00]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [11:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [4:0.50]
-; SKX-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm1 # sched: [11:0.50]
-; SKX-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm2
+; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm0, %ymm0
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm3, %ymm0
+; HASWELL-NO-FMA-NEXT: vrcpps %ymm1, %ymm4
+; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm2, %ymm0
+; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm2, %ymm0
+; HASWELL-NO-FMA-NEXT: vmulps %ymm4, %ymm1, %ymm1
+; HASWELL-NO-FMA-NEXT: vsubps %ymm1, %ymm3, %ymm1
+; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm4, %ymm1
+; HASWELL-NO-FMA-NEXT: vaddps %ymm1, %ymm4, %ymm1
+; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm2
+; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm3
+; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm3, %ymm0
+; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm2, %ymm1
+; HASWELL-NO-FMA-NEXT: retq
+;
+; AVX512-LABEL: v16f32_one_step_2_divs:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vrcp14ps %zmm0, %zmm1
+; AVX512-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem
+; AVX512-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1
+; AVX512-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm1
+; AVX512-NEXT: vmulps %zmm0, %zmm1, %zmm0
+; AVX512-NEXT: retq
%div = fdiv fast <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, %x
%div2 = fdiv fast <16 x float> %div, %x
ret <16 x float> %div2
@@ -1745,138 +1539,126 @@ define <16 x float> @v16f32_two_step2(<16 x float> %x) #2 {
;
; BDVER2-LABEL: v16f32_two_step2:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [5:2.00]
-; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm4 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2 # sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1 # sched: [5:0.50]
-; BDVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [5:2.00]
-; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [10:2.00]
-; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm4 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2 # sched: [5:0.50]
-; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0 # sched: [5:0.50]
-; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [10:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpps %ymm1, %ymm2
+; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm4
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1
+; BDVER2-NEXT: vrcpps %ymm0, %ymm2
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm4
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2
+; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0
+; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v16f32_two_step2:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [5:1.00]
-; BTVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm3 # sched: [2:2.00]
-; BTVER2-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm3, %ymm2, %ymm3 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm3, %ymm2, %ymm2 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm1 # sched: [2:2.00]
-; BTVER2-NEXT: vsubps %ymm1, %ymm4, %ymm1 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm1, %ymm2, %ymm1 # sched: [3:2.00]
-; BTVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [7:2.00]
-; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm3 # sched: [2:2.00]
-; BTVER2-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm3, %ymm2, %ymm3 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm3, %ymm2, %ymm2 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vsubps %ymm0, %ymm4, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:2.00]
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [7:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; BTVER2-NEXT: vrcpps %ymm1, %ymm2
+; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm3
+; BTVER2-NEXT: vsubps %ymm3, %ymm4, %ymm3
+; BTVER2-NEXT: vmulps %ymm3, %ymm2, %ymm3
+; BTVER2-NEXT: vaddps %ymm3, %ymm2, %ymm2
+; BTVER2-NEXT: vmulps %ymm2, %ymm1, %ymm1
+; BTVER2-NEXT: vsubps %ymm1, %ymm4, %ymm1
+; BTVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1
+; BTVER2-NEXT: vaddps %ymm1, %ymm2, %ymm1
+; BTVER2-NEXT: vrcpps %ymm0, %ymm2
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
+; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm3
+; BTVER2-NEXT: vsubps %ymm3, %ymm4, %ymm3
+; BTVER2-NEXT: vmulps %ymm3, %ymm2, %ymm3
+; BTVER2-NEXT: vaddps %ymm3, %ymm2, %ymm2
+; BTVER2-NEXT: vmulps %ymm2, %ymm0, %ymm0
+; BTVER2-NEXT: vsubps %ymm0, %ymm4, %ymm0
+; BTVER2-NEXT: vmulps %ymm0, %ymm2, %ymm0
+; BTVER2-NEXT: vaddps %ymm0, %ymm2, %ymm0
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v16f32_two_step2:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %ymm1, %ymm2 # sched: [7:2.00]
-; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm3 # sched: [5:1.00]
-; SANDY-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; SANDY-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm3 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm3, %ymm2, %ymm2 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm1 # sched: [5:1.00]
-; SANDY-NEXT: vsubps %ymm1, %ymm4, %ymm1 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm1, %ymm2, %ymm1 # sched: [3:1.00]
-; SANDY-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00]
-; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm3 # sched: [5:1.00]
-; SANDY-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm3 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm3, %ymm2, %ymm2 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vsubps %ymm0, %ymm4, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
-; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
-; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpps %ymm1, %ymm2
+; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm3
+; SANDY-NEXT: vmovaps {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; SANDY-NEXT: vsubps %ymm3, %ymm4, %ymm3
+; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm3
+; SANDY-NEXT: vaddps %ymm3, %ymm2, %ymm2
+; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm1
+; SANDY-NEXT: vsubps %ymm1, %ymm4, %ymm1
+; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1
+; SANDY-NEXT: vaddps %ymm1, %ymm2, %ymm1
+; SANDY-NEXT: vrcpps %ymm0, %ymm2
+; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm3
+; SANDY-NEXT: vsubps %ymm3, %ymm4, %ymm3
+; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm3
+; SANDY-NEXT: vaddps %ymm3, %ymm2, %ymm2
+; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0
+; SANDY-NEXT: vsubps %ymm0, %ymm4, %ymm0
+; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0
+; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0
+; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v16f32_two_step2:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %ymm1, %ymm2 # sched: [11:2.00]
-; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; HASWELL-NEXT: vmovaps %ymm2, %ymm4 # sched: [1:1.00]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm4 = -(ymm1 * ymm4) + ymm3 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm4 * ymm1) + ymm3 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm4) + ymm4 sched: [5:0.50]
-; HASWELL-NEXT: vrcpps %ymm0, %ymm2 # sched: [11:2.00]
-; HASWELL-NEXT: vmovaps %ymm2, %ymm4 # sched: [1:1.00]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm4 = -(ymm0 * ymm4) + ymm3 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2 sched: [5:0.50]
-; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm4 * ymm0) + ymm3 sched: [5:0.50]
-; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm4) + ymm4 sched: [5:0.50]
-; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50]
-; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpps %ymm1, %ymm2
+; HASWELL-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NEXT: vmovaps %ymm2, %ymm4
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm4 = -(ymm1 * ymm4) + ymm3
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm1 = -(ymm4 * ymm1) + ymm3
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm4) + ymm4
+; HASWELL-NEXT: vrcpps %ymm0, %ymm2
+; HASWELL-NEXT: vmovaps %ymm2, %ymm4
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm4 = -(ymm0 * ymm4) + ymm3
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm4 = (ymm4 * ymm2) + ymm2
+; HASWELL-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm4 * ymm0) + ymm3
+; HASWELL-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm4) + ymm4
+; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v16f32_two_step2:
; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vrcpps %ymm1, %ymm2 # sched: [11:2.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm3 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [7:0.50]
-; HASWELL-NO-FMA-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm3, %ymm2, %ymm3 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddps %ymm3, %ymm2, %ymm2 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm1 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vsubps %ymm1, %ymm4, %ymm1 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddps %ymm1, %ymm2, %ymm1 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm2 # sched: [11:2.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm0, %ymm3 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vsubps %ymm3, %ymm4, %ymm3 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm3, %ymm2, %ymm3 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddps %ymm3, %ymm2, %ymm2 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm0, %ymm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm4, %ymm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm2, %ymm0 # sched: [5:0.50]
-; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm2, %ymm0 # sched: [3:1.00]
-; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50]
-; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:0.50]
-; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: v16f32_two_step2:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [11:2.00]
-; KNL-NEXT: vbroadcastss {{.*#+}} zmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [10:1.00]
-; KNL-NEXT: vmovaps %zmm1, %zmm3 # sched: [1:1.00]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1 sched: [5:0.50]
-; KNL-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2 sched: [5:0.50]
-; KNL-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3 sched: [5:0.50]
-; KNL-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [12:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: v16f32_two_step2:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [4:2.00]
-; SKX-NEXT: vbroadcastss {{.*#+}} zmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] sched: [8:0.50]
-; SKX-NEXT: vmovaps %zmm1, %zmm3 # sched: [1:0.33]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [4:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1 sched: [4:0.50]
-; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2 sched: [4:0.50]
-; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3 sched: [4:0.50]
-; SKX-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; HASWELL-NO-FMA-NEXT: vrcpps %ymm1, %ymm2
+; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm3
+; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*#+}} ymm4 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; HASWELL-NO-FMA-NEXT: vsubps %ymm3, %ymm4, %ymm3
+; HASWELL-NO-FMA-NEXT: vmulps %ymm3, %ymm2, %ymm3
+; HASWELL-NO-FMA-NEXT: vaddps %ymm3, %ymm2, %ymm2
+; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm1
+; HASWELL-NO-FMA-NEXT: vsubps %ymm1, %ymm4, %ymm1
+; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm2, %ymm1
+; HASWELL-NO-FMA-NEXT: vaddps %ymm1, %ymm2, %ymm1
+; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm2
+; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm0, %ymm3
+; HASWELL-NO-FMA-NEXT: vsubps %ymm3, %ymm4, %ymm3
+; HASWELL-NO-FMA-NEXT: vmulps %ymm3, %ymm2, %ymm3
+; HASWELL-NO-FMA-NEXT: vaddps %ymm3, %ymm2, %ymm2
+; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm0, %ymm0
+; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm4, %ymm0
+; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm2, %ymm0
+; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm2, %ymm0
+; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
+; HASWELL-NO-FMA-NEXT: retq
+;
+; AVX512-LABEL: v16f32_two_step2:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vrcp14ps %zmm0, %zmm1
+; AVX512-NEXT: vbroadcastss {{.*#+}} zmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
+; AVX512-NEXT: vmovaps %zmm1, %zmm3
+; AVX512-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2
+; AVX512-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1
+; AVX512-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2
+; AVX512-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3
+; AVX512-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0
+; AVX512-NEXT: retq
%div = fdiv fast <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, %x
ret <16 x float> %div
}
@@ -1904,43 +1686,38 @@ define <16 x float> @v16f32_no_step(<16 x float> %x) #3 {
;
; BDVER2-LABEL: v16f32_no_step:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: vrcpps %ymm1, %ymm1 # sched: [5:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpps %ymm0, %ymm0
+; BDVER2-NEXT: vrcpps %ymm1, %ymm1
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v16f32_no_step:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vrcpps %ymm1, %ymm1 # sched: [2:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vrcpps %ymm0, %ymm0
+; BTVER2-NEXT: vrcpps %ymm1, %ymm1
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v16f32_no_step:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
-; SANDY-NEXT: vrcpps %ymm1, %ymm1 # sched: [7:2.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpps %ymm0, %ymm0
+; SANDY-NEXT: vrcpps %ymm1, %ymm1
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v16f32_no_step:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
-; HASWELL-NEXT: vrcpps %ymm1, %ymm1 # sched: [11:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpps %ymm0, %ymm0
+; HASWELL-NEXT: vrcpps %ymm1, %ymm1
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v16f32_no_step:
; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
-; HASWELL-NO-FMA-NEXT: vrcpps %ymm1, %ymm1 # sched: [11:2.00]
-; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: v16f32_no_step:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [11:2.00]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: v16f32_no_step:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [4:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
+; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0
+; HASWELL-NO-FMA-NEXT: vrcpps %ymm1, %ymm1
+; HASWELL-NO-FMA-NEXT: retq
+;
+; AVX512-LABEL: v16f32_no_step:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vrcp14ps %zmm0, %zmm0
+; AVX512-NEXT: retq
%div = fdiv fast <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
ret <16 x float> %div
}
@@ -1976,55 +1753,49 @@ define <16 x float> @v16f32_no_step2(<16 x float> %x) #3 {
;
; BDVER2-LABEL: v16f32_no_step2:
; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps %ymm1, %ymm1 # sched: [5:2.00]
-; BDVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:2.00]
-; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [10:2.00]
-; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [10:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
+; BDVER2-NEXT: vrcpps %ymm1, %ymm1
+; BDVER2-NEXT: vrcpps %ymm0, %ymm0
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
+; BDVER2-NEXT: retq
;
; BTVER2-LABEL: v16f32_no_step2:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vrcpps %ymm1, %ymm1 # sched: [2:2.00]
-; BTVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [2:2.00]
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [7:2.00]
-; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [7:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
+; BTVER2-NEXT: vrcpps %ymm1, %ymm1
+; BTVER2-NEXT: vrcpps %ymm0, %ymm0
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; BTVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
+; BTVER2-NEXT: retq
;
; SANDY-LABEL: v16f32_no_step2:
; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %ymm1, %ymm1 # sched: [7:2.00]
-; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
-; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
-; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
+; SANDY-NEXT: vrcpps %ymm1, %ymm1
+; SANDY-NEXT: vrcpps %ymm0, %ymm0
+; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
+; SANDY-NEXT: retq
;
; HASWELL-LABEL: v16f32_no_step2:
; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %ymm1, %ymm1 # sched: [11:2.00]
-; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
-; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50]
-; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
+; HASWELL-NEXT: vrcpps %ymm1, %ymm1
+; HASWELL-NEXT: vrcpps %ymm0, %ymm0
+; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
+; HASWELL-NEXT: retq
;
; HASWELL-NO-FMA-LABEL: v16f32_no_step2:
; HASWELL-NO-FMA: # %bb.0:
-; HASWELL-NO-FMA-NEXT: vrcpps %ymm1, %ymm1 # sched: [11:2.00]
-; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00]
-; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:0.50]
-; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:0.50]
-; HASWELL-NO-FMA-NEXT: retq # sched: [7:1.00]
-;
-; KNL-LABEL: v16f32_no_step2:
-; KNL: # %bb.0:
-; KNL-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [11:2.00]
-; KNL-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [12:0.50]
-; KNL-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: v16f32_no_step2:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [4:2.00]
-; SKX-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
+; HASWELL-NO-FMA-NEXT: vrcpps %ymm1, %ymm1
+; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0
+; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
+; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
+; HASWELL-NO-FMA-NEXT: retq
+;
+; AVX512-LABEL: v16f32_no_step2:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vrcp14ps %zmm0, %zmm0
+; AVX512-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0
+; AVX512-NEXT: retq
%div = fdiv fast <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, %x
ret <16 x float> %div
}
diff --git a/llvm/test/CodeGen/X86/rtm-schedule.ll b/llvm/test/CodeGen/X86/rtm-schedule.ll
deleted file mode 100644
index 1a724f5799e..00000000000
--- a/llvm/test/CodeGen/X86/rtm-schedule.ll
+++ /dev/null
@@ -1,62 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+rtm -mcpu=x86-64 -mattr=+rtm | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+rtm -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE --check-prefix=SKL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+rtm -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE --check-prefix=SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+rtm -mcpu=cannonlake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE --check-prefix=CNL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+rtm -mcpu=icelake-client | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE --check-prefix=ICL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+rtm -mcpu=icelake-server | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE --check-prefix=ICL
-
-define i32 @test_xbegin() nounwind uwtable {
-; GENERIC-LABEL: test_xbegin:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: xbegin .LBB0_2 # sched: [100:0.33]
-; GENERIC-NEXT: # %bb.1:
-; GENERIC-NEXT: movl $-1, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-; GENERIC-NEXT: .LBB0_2:
-; GENERIC-NEXT: # XABORT DEF # sched: [100:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKYLAKE-LABEL: test_xbegin:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: xbegin .LBB0_2 # sched: [100:0.25]
-; SKYLAKE-NEXT: # %bb.1:
-; SKYLAKE-NEXT: movl $-1, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-; SKYLAKE-NEXT: .LBB0_2:
-; SKYLAKE-NEXT: # XABORT DEF # sched: [100:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
- %1 = tail call i32 @llvm.x86.xbegin() nounwind
- ret i32 %1
-}
-declare i32 @llvm.x86.xbegin() nounwind
-
-define void @test_xend() nounwind uwtable {
-; GENERIC-LABEL: test_xend:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: xend # sched: [100:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKYLAKE-LABEL: test_xend:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: xend # sched: [100:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
- tail call void @llvm.x86.xend() nounwind
- ret void
-}
-declare void @llvm.x86.xend() nounwind
-
-define void @test_xabort() nounwind uwtable {
-; GENERIC-LABEL: test_xabort:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: xabort $2 # sched: [100:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SKYLAKE-LABEL: test_xabort:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: xabort $2 # sched: [100:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
- tail call void @llvm.x86.xabort(i8 2)
- ret void
-}
-declare void @llvm.x86.xabort(i8) nounwind
diff --git a/llvm/test/CodeGen/X86/schedule-x86-64-shld.ll b/llvm/test/CodeGen/X86/schedule-x86-64-shld.ll
deleted file mode 100644
index 9aee7138cd9..00000000000
--- a/llvm/test/CodeGen/X86/schedule-x86-64-shld.ll
+++ /dev/null
@@ -1,471 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver1 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER12 --check-prefix=BDVER1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER12 --check-prefix=BDVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
-
-
-; uint64_t lshift10(uint64_t a, uint64_t b)
-; {
-; return (a << 10) | (b >> 54);
-; }
-
-define i64 @lshift10_optsize(i64 %a, i64 %b) nounwind readnone optsize {
-; GENERIC-LABEL: lshift10_optsize:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33]
-; GENERIC-NEXT: shldq $10, %rsi, %rax # sched: [2:0.67]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: lshift10_optsize:
-; BDVER12: # %bb.0: # %entry
-; BDVER12-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; BDVER12-NEXT: shldq $10, %rsi, %rax # sched: [4:3.00]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: lshift10_optsize:
-; BTVER2: # %bb.0: # %entry
-; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; BTVER2-NEXT: shldq $10, %rsi, %rax # sched: [3:3.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-entry:
- %shl = shl i64 %a, 10
- %shr = lshr i64 %b, 54
- %or = or i64 %shr, %shl
- ret i64 %or
-}
-
-define i64 @lshift10(i64 %a, i64 %b) nounwind readnone {
-; GENERIC-LABEL: lshift10:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33]
-; GENERIC-NEXT: shldq $10, %rsi, %rax # sched: [2:0.67]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: lshift10:
-; BDVER12: # %bb.0: # %entry
-; BDVER12-NEXT: shlq $10, %rdi # sched: [1:0.50]
-; BDVER12-NEXT: shrq $54, %rsi # sched: [1:0.50]
-; BDVER12-NEXT: leaq (%rsi,%rdi), %rax # sched: [1:0.50]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: lshift10:
-; BTVER2: # %bb.0: # %entry
-; BTVER2-NEXT: shlq $10, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: shrq $54, %rsi # sched: [1:0.50]
-; BTVER2-NEXT: leaq (%rsi,%rdi), %rax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-entry:
- %shl = shl i64 %a, 10
- %shr = lshr i64 %b, 54
- %or = or i64 %shr, %shl
- ret i64 %or
-}
-
-; uint64_t rshift10(uint64_t a, uint64_t b)
-; {
-; return (a >> 62) | (b << 2);
-; }
-
-; Should be done via shld
-define i64 @rshift10_optsize(i64 %a, i64 %b) nounwind readnone optsize {
-; GENERIC-LABEL: rshift10_optsize:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33]
-; GENERIC-NEXT: shrdq $62, %rsi, %rax # sched: [2:0.67]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: rshift10_optsize:
-; BDVER12: # %bb.0: # %entry
-; BDVER12-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; BDVER12-NEXT: shrdq $62, %rsi, %rax # sched: [4:3.00]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: rshift10_optsize:
-; BTVER2: # %bb.0: # %entry
-; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; BTVER2-NEXT: shrdq $62, %rsi, %rax # sched: [3:3.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-entry:
- %shl = lshr i64 %a, 62
- %shr = shl i64 %b, 2
- %or = or i64 %shr, %shl
- ret i64 %or
-}
-
-; Should be done via lea (x,y,4),z
-define i64 @rshift10(i64 %a, i64 %b) nounwind readnone {
-; GENERIC-LABEL: rshift10:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33]
-; GENERIC-NEXT: shrdq $62, %rsi, %rax # sched: [2:0.67]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: rshift10:
-; BDVER12: # %bb.0: # %entry
-; BDVER12-NEXT: shrq $62, %rdi # sched: [1:0.50]
-; BDVER12-NEXT: leaq (%rdi,%rsi,4), %rax # sched: [1:0.50]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: rshift10:
-; BTVER2: # %bb.0: # %entry
-; BTVER2-NEXT: shrq $62, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: leaq (%rdi,%rsi,4), %rax # sched: [2:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-entry:
- %shl = lshr i64 %a, 62
- %shr = shl i64 %b, 2
- %or = or i64 %shr, %shl
- ret i64 %or
-}
-
-;uint64_t lshift(uint64_t a, uint64_t b, uint64_t c)
-;{
-; return (a << c) | (b >> (64-c));
-;}
-
-define i64 @lshift_cl_optsize(i64 %a, i64 %b, i64 %c) nounwind readnone optsize {
-; GENERIC-LABEL: lshift_cl_optsize:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: movq %rdx, %rcx # sched: [1:0.33]
-; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33]
-; GENERIC-NEXT: # kill: def $cl killed $cl killed $rcx
-; GENERIC-NEXT: shldq %cl, %rsi, %rax # sched: [4:1.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: lshift_cl_optsize:
-; BDVER12: # %bb.0: # %entry
-; BDVER12-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.50]
-; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx
-; BDVER12-NEXT: shldq %cl, %rsi, %rax # sched: [4:4.00]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: lshift_cl_optsize:
-; BTVER2: # %bb.0: # %entry
-; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50]
-; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx
-; BTVER2-NEXT: shldq %cl, %rsi, %rax # sched: [4:4.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-entry:
- %shl = shl i64 %a, %c
- %sub = sub nsw i64 64, %c
- %shr = lshr i64 %b, %sub
- %or = or i64 %shr, %shl
- ret i64 %or
-}
-
-define i64 @lshift_cl(i64 %a, i64 %b, i64 %c) nounwind readnone {
-; GENERIC-LABEL: lshift_cl:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: movq %rdx, %rcx # sched: [1:0.33]
-; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33]
-; GENERIC-NEXT: # kill: def $cl killed $cl killed $rcx
-; GENERIC-NEXT: shldq %cl, %rsi, %rax # sched: [4:1.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: lshift_cl:
-; BDVER12: # %bb.0: # %entry
-; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.50]
-; BDVER12-NEXT: movq %rsi, %rax # sched: [1:0.50]
-; BDVER12-NEXT: shlq %cl, %rdi # sched: [1:0.50]
-; BDVER12-NEXT: negb %cl # sched: [1:0.50]
-; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx
-; BDVER12-NEXT: shrq %cl, %rax # sched: [1:0.50]
-; BDVER12-NEXT: orq %rdi, %rax # sched: [1:0.50]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: lshift_cl:
-; BTVER2: # %bb.0: # %entry
-; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50]
-; BTVER2-NEXT: movq %rsi, %rax # sched: [1:0.50]
-; BTVER2-NEXT: shlq %cl, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: negb %cl # sched: [1:0.50]
-; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx
-; BTVER2-NEXT: shrq %cl, %rax # sched: [1:0.50]
-; BTVER2-NEXT: orq %rdi, %rax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-entry:
- %shl = shl i64 %a, %c
- %sub = sub nsw i64 64, %c
- %shr = lshr i64 %b, %sub
- %or = or i64 %shr, %shl
- ret i64 %or
-}
-
-
-;uint64_t rshift(uint64_t a, uint64_t b, int c)
-;{
-; return (a >> c) | (b << (64-c));
-;}
-
-define i64 @rshift_cl_optsize(i64 %a, i64 %b, i64 %c) nounwind readnone optsize {
-; GENERIC-LABEL: rshift_cl_optsize:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: movq %rdx, %rcx # sched: [1:0.33]
-; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33]
-; GENERIC-NEXT: # kill: def $cl killed $cl killed $rcx
-; GENERIC-NEXT: shrdq %cl, %rsi, %rax # sched: [4:1.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: rshift_cl_optsize:
-; BDVER12: # %bb.0: # %entry
-; BDVER12-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.50]
-; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx
-; BDVER12-NEXT: shrdq %cl, %rsi, %rax # sched: [4:4.00]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: rshift_cl_optsize:
-; BTVER2: # %bb.0: # %entry
-; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50]
-; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx
-; BTVER2-NEXT: shrdq %cl, %rsi, %rax # sched: [4:4.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-entry:
- %shr = lshr i64 %a, %c
- %sub = sub nsw i64 64, %c
- %shl = shl i64 %b, %sub
- %or = or i64 %shr, %shl
- ret i64 %or
-}
-
-define i64 @rshift_cl(i64 %a, i64 %b, i64 %c) nounwind readnone {
-; GENERIC-LABEL: rshift_cl:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: movq %rdx, %rcx # sched: [1:0.33]
-; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33]
-; GENERIC-NEXT: # kill: def $cl killed $cl killed $rcx
-; GENERIC-NEXT: shrdq %cl, %rsi, %rax # sched: [4:1.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: rshift_cl:
-; BDVER12: # %bb.0: # %entry
-; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.50]
-; BDVER12-NEXT: movq %rsi, %rax # sched: [1:0.50]
-; BDVER12-NEXT: shrq %cl, %rdi # sched: [1:0.50]
-; BDVER12-NEXT: negb %cl # sched: [1:0.50]
-; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx
-; BDVER12-NEXT: shlq %cl, %rax # sched: [1:0.50]
-; BDVER12-NEXT: orq %rdi, %rax # sched: [1:0.50]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: rshift_cl:
-; BTVER2: # %bb.0: # %entry
-; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50]
-; BTVER2-NEXT: movq %rsi, %rax # sched: [1:0.50]
-; BTVER2-NEXT: shrq %cl, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: negb %cl # sched: [1:0.50]
-; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx
-; BTVER2-NEXT: shlq %cl, %rax # sched: [1:0.50]
-; BTVER2-NEXT: orq %rdi, %rax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-entry:
- %shr = lshr i64 %a, %c
- %sub = sub nsw i64 64, %c
- %shl = shl i64 %b, %sub
- %or = or i64 %shr, %shl
- ret i64 %or
-}
-
-; extern uint64_t x;
-;void lshift(uint64_t a, uint64_t b, uint_64_t c)
-;{
-; x = (x << c) | (a >> (64-c));
-;}
-@x = global i64 0, align 4
-
-define void @lshift_mem_cl_optsize(i64 %a, i64 %c) nounwind readnone optsize {
-; GENERIC-LABEL: lshift_mem_cl_optsize:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: movq %rsi, %rcx # sched: [1:0.33]
-; GENERIC-NEXT: # kill: def $cl killed $cl killed $rcx
-; GENERIC-NEXT: shldq %cl, %rdi, {{.*}}(%rip) # sched: [10:1.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: lshift_mem_cl_optsize:
-; BDVER12: # %bb.0: # %entry
-; BDVER12-NEXT: movq %rsi, %rcx # sched: [1:0.50]
-; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx
-; BDVER12-NEXT: shldq %cl, %rdi, {{.*}}(%rip) # sched: [4:11.00]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: lshift_mem_cl_optsize:
-; BTVER2: # %bb.0: # %entry
-; BTVER2-NEXT: movq %rsi, %rcx # sched: [1:0.50]
-; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx
-; BTVER2-NEXT: shldq %cl, %rdi, {{.*}}(%rip) # sched: [9:11.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-entry:
- %b = load i64, i64* @x
- %shl = shl i64 %b, %c
- %sub = sub nsw i64 64, %c
- %shr = lshr i64 %a, %sub
- %or = or i64 %shl, %shr
- store i64 %or, i64* @x
- ret void
-}
-
-define void @lshift_mem_cl(i64 %a, i64 %c) nounwind readnone {
-; GENERIC-LABEL: lshift_mem_cl:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: movq %rsi, %rcx # sched: [1:0.33]
-; GENERIC-NEXT: # kill: def $cl killed $cl killed $rcx
-; GENERIC-NEXT: shldq %cl, %rdi, {{.*}}(%rip) # sched: [10:1.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: lshift_mem_cl:
-; BDVER12: # %bb.0: # %entry
-; BDVER12-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50]
-; BDVER12-NEXT: movq %rsi, %rcx # sched: [1:0.50]
-; BDVER12-NEXT: shlq %cl, %rax # sched: [1:0.50]
-; BDVER12-NEXT: negb %cl # sched: [1:0.50]
-; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx
-; BDVER12-NEXT: shrq %cl, %rdi # sched: [1:0.50]
-; BDVER12-NEXT: orq %rax, %rdi # sched: [1:0.50]
-; BDVER12-NEXT: movq %rdi, {{.*}}(%rip) # sched: [1:1.00]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: lshift_mem_cl:
-; BTVER2: # %bb.0: # %entry
-; BTVER2-NEXT: movq {{.*}}(%rip), %rax # sched: [3:1.00]
-; BTVER2-NEXT: movq %rsi, %rcx # sched: [1:0.50]
-; BTVER2-NEXT: shlq %cl, %rax # sched: [1:0.50]
-; BTVER2-NEXT: negb %cl # sched: [1:0.50]
-; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx
-; BTVER2-NEXT: shrq %cl, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: orq %rax, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: movq %rdi, {{.*}}(%rip) # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-entry:
- %b = load i64, i64* @x
- %shl = shl i64 %b, %c
- %sub = sub nsw i64 64, %c
- %shr = lshr i64 %a, %sub
- %or = or i64 %shl, %shr
- store i64 %or, i64* @x
- ret void
-}
-
-define void @lshift_mem(i64 %a) nounwind readnone {
-; GENERIC-LABEL: lshift_mem:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: shldq $10, %rdi, {{.*}}(%rip) # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: lshift_mem:
-; BDVER12: # %bb.0: # %entry
-; BDVER12-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50]
-; BDVER12-NEXT: shrq $54, %rdi # sched: [1:0.50]
-; BDVER12-NEXT: shlq $10, %rax # sched: [1:0.50]
-; BDVER12-NEXT: orq %rax, %rdi # sched: [1:0.50]
-; BDVER12-NEXT: movq %rdi, {{.*}}(%rip) # sched: [1:1.00]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: lshift_mem:
-; BTVER2: # %bb.0: # %entry
-; BTVER2-NEXT: movq {{.*}}(%rip), %rax # sched: [3:1.00]
-; BTVER2-NEXT: shrq $54, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: shlq $10, %rax # sched: [1:0.50]
-; BTVER2-NEXT: orq %rax, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: movq %rdi, {{.*}}(%rip) # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-entry:
- %b = load i64, i64* @x
- %shl = shl i64 %b, 10
- %shr = lshr i64 %a, 54
- %or = or i64 %shr, %shl
- store i64 %or, i64* @x
- ret void
-}
-
-define void @lshift_mem_optsize(i64 %a) nounwind readnone optsize {
-; GENERIC-LABEL: lshift_mem_optsize:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: shldq $10, %rdi, {{.*}}(%rip) # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: lshift_mem_optsize:
-; BDVER12: # %bb.0: # %entry
-; BDVER12-NEXT: shldq $10, %rdi, {{.*}}(%rip) # sched: [4:11.00]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: lshift_mem_optsize:
-; BTVER2: # %bb.0: # %entry
-; BTVER2-NEXT: shldq $10, %rdi, {{.*}}(%rip) # sched: [9:11.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-entry:
- %b = load i64, i64* @x
- %shl = shl i64 %b, 10
- %shr = lshr i64 %a, 54
- %or = or i64 %shr, %shl
- store i64 %or, i64* @x
- ret void
-}
-
-define void @lshift_mem_b(i64 %b) nounwind readnone {
-; GENERIC-LABEL: lshift_mem_b:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50]
-; GENERIC-NEXT: shrdq $54, %rdi, %rax # sched: [2:0.67]
-; GENERIC-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: lshift_mem_b:
-; BDVER12: # %bb.0: # %entry
-; BDVER12-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50]
-; BDVER12-NEXT: shlq $10, %rdi # sched: [1:0.50]
-; BDVER12-NEXT: shrq $54, %rax # sched: [1:0.50]
-; BDVER12-NEXT: orq %rdi, %rax # sched: [1:0.50]
-; BDVER12-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: lshift_mem_b:
-; BTVER2: # %bb.0: # %entry
-; BTVER2-NEXT: movq {{.*}}(%rip), %rax # sched: [3:1.00]
-; BTVER2-NEXT: shlq $10, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: shrq $54, %rax # sched: [1:0.50]
-; BTVER2-NEXT: orq %rdi, %rax # sched: [1:0.50]
-; BTVER2-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-entry:
- %a = load i64, i64* @x
- %shl = shl i64 %b, 10
- %shr = lshr i64 %a, 54
- %or = or i64 %shr, %shl
- store i64 %or, i64* @x
- ret void
-}
-
-define void @lshift_mem_b_optsize(i64 %b) nounwind readnone optsize {
-; GENERIC-LABEL: lshift_mem_b_optsize:
-; GENERIC: # %bb.0: # %entry
-; GENERIC-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50]
-; GENERIC-NEXT: shrdq $54, %rdi, %rax # sched: [2:0.67]
-; GENERIC-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: lshift_mem_b_optsize:
-; BDVER12: # %bb.0: # %entry
-; BDVER12-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50]
-; BDVER12-NEXT: shrdq $54, %rdi, %rax # sched: [4:3.00]
-; BDVER12-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: lshift_mem_b_optsize:
-; BTVER2: # %bb.0: # %entry
-; BTVER2-NEXT: movq {{.*}}(%rip), %rax # sched: [3:1.00]
-; BTVER2-NEXT: shrdq $54, %rdi, %rax # sched: [3:3.00]
-; BTVER2-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-entry:
- %a = load i64, i64* @x
- %shl = shl i64 %b, 10
- %shr = lshr i64 %a, 54
- %or = or i64 %shr, %shl
- store i64 %or, i64* @x
- ret void
-}
-
diff --git a/llvm/test/CodeGen/X86/schedule-x86_32.ll b/llvm/test/CodeGen/X86/schedule-x86_32.ll
deleted file mode 100644
index d691a8b814f..00000000000
--- a/llvm/test/CodeGen/X86/schedule-x86_32.ll
+++ /dev/null
@@ -1,2601 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=i686 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM
-; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
-; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
-; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
-; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
-; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
-; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
-
-define i8 @test_aaa(i8 %a0) optsize {
-; GENERIC-LABEL: test_aaa:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movb {{[0-9]+}}(%esp), %al
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: aaa
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_aaa:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: aaa # sched: [13:6.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_aaa:
-; SLM: # %bb.0:
-; SLM-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: aaa # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_aaa:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: aaa # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_aaa:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: aaa # sched: [100:0.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_aaa:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: aaa # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_aaa:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: aaa # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_aaa:
-; SKX: # %bb.0:
-; SKX-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: aaa # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_aaa:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: aaa # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_aaa:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: aaa # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_aaa:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: aaa # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- %1 = tail call i8 asm "aaa", "=r,r"(i8 %a0) nounwind
- ret i8 %1
-}
-
-define void @test_aad(i16 %a0) optsize {
-; GENERIC-LABEL: test_aad:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: aad
-; GENERIC-NEXT: aad $16
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_aad:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: aad # sched: [7:3.50]
-; ATOM-NEXT: aad $16 # sched: [7:3.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_aad:
-; SLM: # %bb.0:
-; SLM-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: aad # sched: [100:1.00]
-; SLM-NEXT: aad $16 # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_aad:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: aad # sched: [100:0.33]
-; SANDY-NEXT: aad $16 # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_aad:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: aad # sched: [100:0.25]
-; HASWELL-NEXT: aad $16 # sched: [100:0.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_aad:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: aad # sched: [100:0.25]
-; BROADWELL-NEXT: aad $16 # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_aad:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: aad # sched: [100:0.25]
-; SKYLAKE-NEXT: aad $16 # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_aad:
-; SKX: # %bb.0:
-; SKX-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: aad # sched: [100:0.25]
-; SKX-NEXT: aad $16 # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_aad:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: aad # sched: [100:0.50]
-; BDVER2-NEXT: aad $16 # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_aad:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: aad # sched: [100:0.50]
-; BTVER2-NEXT: aad $16 # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_aad:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: aad # sched: [100:0.25]
-; ZNVER1-NEXT: aad $16 # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm "aad \0A\09 aad $1", "r,i"(i16 %a0, i16 16) nounwind
- ret void
-}
-
-define void @test_aam(i8 %a0) optsize {
-; GENERIC-LABEL: test_aam:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movb {{[0-9]+}}(%esp), %al
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: aam
-; GENERIC-NEXT: aam $16
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_aam:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: aam # sched: [21:10.50]
-; ATOM-NEXT: aam $16 # sched: [21:10.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_aam:
-; SLM: # %bb.0:
-; SLM-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: aam # sched: [100:1.00]
-; SLM-NEXT: aam $16 # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_aam:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: aam # sched: [100:0.33]
-; SANDY-NEXT: aam $16 # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_aam:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: aam # sched: [100:0.25]
-; HASWELL-NEXT: aam $16 # sched: [100:0.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_aam:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: aam # sched: [100:0.25]
-; BROADWELL-NEXT: aam $16 # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_aam:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: aam # sched: [100:0.25]
-; SKYLAKE-NEXT: aam $16 # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_aam:
-; SKX: # %bb.0:
-; SKX-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: aam # sched: [100:0.25]
-; SKX-NEXT: aam $16 # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_aam:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: aam # sched: [100:0.50]
-; BDVER2-NEXT: aam $16 # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_aam:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: aam # sched: [100:0.50]
-; BTVER2-NEXT: aam $16 # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_aam:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: aam # sched: [100:0.25]
-; ZNVER1-NEXT: aam $16 # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm "aam \0A\09 aam $1", "r,i"(i8 %a0, i8 16) nounwind
- ret void
-}
-
-define i8 @test_aas(i8 %a0) optsize {
-; GENERIC-LABEL: test_aas:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movb {{[0-9]+}}(%esp), %al
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: aas
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_aas:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: aas # sched: [13:6.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_aas:
-; SLM: # %bb.0:
-; SLM-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: aas # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_aas:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: aas # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_aas:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: aas # sched: [100:0.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_aas:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: aas # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_aas:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: aas # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_aas:
-; SKX: # %bb.0:
-; SKX-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: aas # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_aas:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: aas # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_aas:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: aas # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_aas:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: aas # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- %1 = tail call i8 asm "aas", "=r,r"(i8 %a0) nounwind
- ret i8 %1
-}
-
-define void @test_arpl(i16 %a0, i16 *%a1) optsize {
-; GENERIC-LABEL: test_arpl:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: arpl %ax, (%ecx)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_arpl:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: arpl %ax, (%ecx) # sched: [23:11.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_arpl:
-; SLM: # %bb.0:
-; SLM-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: arpl %ax, (%ecx) # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_arpl:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: arpl %ax, (%ecx) # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_arpl:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: arpl %ax, (%ecx) # sched: [100:0.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_arpl:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: arpl %ax, (%ecx) # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_arpl:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: arpl %ax, (%ecx) # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_arpl:
-; SKX: # %bb.0:
-; SKX-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: arpl %ax, (%ecx) # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_arpl:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: arpl %ax, (%ecx) # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_arpl:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: arpl %ax, (%ecx) # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_arpl:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: arpl %ax, (%ecx) # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- call void asm sideeffect "arpl $0, $1", "r,*m"(i16 %a0, i16 *%a1)
- ret void
-}
-
-define void @test_bound(i16 %a0, i16 *%a1, i32 %a2, i32 *%a3) optsize {
-; GENERIC-LABEL: test_bound:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pushl %esi
-; GENERIC-NEXT: .cfi_def_cfa_offset 8
-; GENERIC-NEXT: .cfi_offset %esi, -8
-; GENERIC-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %edx
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %esi
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: bound %ax, (%esi)
-; GENERIC-NEXT: bound %ecx, (%edx)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: popl %esi
-; GENERIC-NEXT: .cfi_def_cfa_offset 4
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_bound:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pushl %esi # sched: [1:1.00]
-; ATOM-NEXT: .cfi_def_cfa_offset 8
-; ATOM-NEXT: .cfi_offset %esi, -8
-; ATOM-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %esi # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: bound %ax, (%esi) # sched: [11:5.50]
-; ATOM-NEXT: bound %ecx, (%edx) # sched: [11:5.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: popl %esi # sched: [1:1.00]
-; ATOM-NEXT: .cfi_def_cfa_offset 4
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_bound:
-; SLM: # %bb.0:
-; SLM-NEXT: pushl %esi # sched: [1:1.00]
-; SLM-NEXT: .cfi_def_cfa_offset 8
-; SLM-NEXT: .cfi_offset %esi, -8
-; SLM-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [3:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %esi # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: bound %ax, (%esi) # sched: [100:1.00]
-; SLM-NEXT: bound %ecx, (%edx) # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: popl %esi # sched: [3:1.00]
-; SLM-NEXT: .cfi_def_cfa_offset 4
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_bound:
-; SANDY: # %bb.0:
-; SANDY-NEXT: pushl %esi # sched: [5:1.00]
-; SANDY-NEXT: .cfi_def_cfa_offset 8
-; SANDY-NEXT: .cfi_offset %esi, -8
-; SANDY-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %esi # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: bound %ax, (%esi) # sched: [100:0.33]
-; SANDY-NEXT: bound %ecx, (%edx) # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: popl %esi # sched: [6:0.50]
-; SANDY-NEXT: .cfi_def_cfa_offset 4
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_bound:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: pushl %esi # sched: [2:1.00]
-; HASWELL-NEXT: .cfi_def_cfa_offset 8
-; HASWELL-NEXT: .cfi_offset %esi, -8
-; HASWELL-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %esi # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: bound %ax, (%esi) # sched: [1:3.75]
-; HASWELL-NEXT: bound %ecx, (%edx) # sched: [1:3.75]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: popl %esi # sched: [6:0.50]
-; HASWELL-NEXT: .cfi_def_cfa_offset 4
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_bound:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: pushl %esi # sched: [2:1.00]
-; BROADWELL-NEXT: .cfi_def_cfa_offset 8
-; BROADWELL-NEXT: .cfi_offset %esi, -8
-; BROADWELL-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %esi # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: bound %ax, (%esi) # sched: [100:0.25]
-; BROADWELL-NEXT: bound %ecx, (%edx) # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: popl %esi # sched: [6:0.50]
-; BROADWELL-NEXT: .cfi_def_cfa_offset 4
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_bound:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: pushl %esi # sched: [2:1.00]
-; SKYLAKE-NEXT: .cfi_def_cfa_offset 8
-; SKYLAKE-NEXT: .cfi_offset %esi, -8
-; SKYLAKE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %esi # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: bound %ax, (%esi) # sched: [100:0.25]
-; SKYLAKE-NEXT: bound %ecx, (%edx) # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: popl %esi # sched: [6:0.50]
-; SKYLAKE-NEXT: .cfi_def_cfa_offset 4
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_bound:
-; SKX: # %bb.0:
-; SKX-NEXT: pushl %esi # sched: [2:1.00]
-; SKX-NEXT: .cfi_def_cfa_offset 8
-; SKX-NEXT: .cfi_offset %esi, -8
-; SKX-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %esi # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: bound %ax, (%esi) # sched: [100:0.25]
-; SKX-NEXT: bound %ecx, (%edx) # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: popl %esi # sched: [6:0.50]
-; SKX-NEXT: .cfi_def_cfa_offset 4
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_bound:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: pushl %esi # sched: [1:1.00]
-; BDVER2-NEXT: .cfi_def_cfa_offset 8
-; BDVER2-NEXT: .cfi_offset %esi, -8
-; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %esi # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: bound %ax, (%esi) # sched: [100:0.50]
-; BDVER2-NEXT: bound %ecx, (%edx) # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: popl %esi # sched: [5:0.50]
-; BDVER2-NEXT: .cfi_def_cfa_offset 4
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_bound:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: pushl %esi # sched: [1:1.00]
-; BTVER2-NEXT: .cfi_def_cfa_offset 8
-; BTVER2-NEXT: .cfi_offset %esi, -8
-; BTVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [3:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %esi # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: bound %ax, (%esi) # sched: [100:0.50]
-; BTVER2-NEXT: bound %ecx, (%edx) # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: popl %esi # sched: [3:1.00]
-; BTVER2-NEXT: .cfi_def_cfa_offset 4
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_bound:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: pushl %esi # sched: [1:0.50]
-; ZNVER1-NEXT: .cfi_def_cfa_offset 8
-; ZNVER1-NEXT: .cfi_offset %esi, -8
-; ZNVER1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %esi # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: bound %ax, (%esi) # sched: [100:0.25]
-; ZNVER1-NEXT: bound %ecx, (%edx) # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: popl %esi # sched: [8:0.50]
-; ZNVER1-NEXT: .cfi_def_cfa_offset 4
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- call void asm sideeffect "bound $0, $1 \0A\09 bound $2, $3", "r,*m,r,*m"(i16 %a0, i16 *%a1, i32 %a2, i32 *%a3)
- ret void
-}
-
-; TODO - test_call
-
-define i8 @test_daa(i8 %a0) optsize {
-; GENERIC-LABEL: test_daa:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movb {{[0-9]+}}(%esp), %al
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: daa
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_daa:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: daa # sched: [18:9.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_daa:
-; SLM: # %bb.0:
-; SLM-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: daa # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_daa:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: daa # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_daa:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: daa # sched: [100:0.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_daa:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: daa # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_daa:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: daa # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_daa:
-; SKX: # %bb.0:
-; SKX-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: daa # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_daa:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: daa # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_daa:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: daa # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_daa:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: daa # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- %1 = tail call i8 asm "daa", "=r,r"(i8 %a0) nounwind
- ret i8 %1
-}
-
-define i8 @test_das(i8 %a0) optsize {
-; GENERIC-LABEL: test_das:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movb {{[0-9]+}}(%esp), %al
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: das
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_das:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: das # sched: [20:10.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_das:
-; SLM: # %bb.0:
-; SLM-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: das # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_das:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: das # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_das:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: das # sched: [100:0.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_das:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: das # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_das:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: das # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_das:
-; SKX: # %bb.0:
-; SKX-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: das # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_das:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: das # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_das:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: das # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_das:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: das # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- %1 = tail call i8 asm "das", "=r,r"(i8 %a0) nounwind
- ret i8 %1
-}
-
-define void @test_dec16(i16 %a0, i16* %a1) optsize {
-; GENERIC-LABEL: test_dec16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: decw %ax
-; GENERIC-NEXT: decw (%ecx)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_dec16:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: decw %ax # sched: [1:0.50]
-; ATOM-NEXT: decw (%ecx) # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_dec16:
-; SLM: # %bb.0:
-; SLM-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: decw %ax # sched: [1:0.50]
-; SLM-NEXT: decw (%ecx) # sched: [5:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_dec16:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: decw %ax # sched: [1:0.33]
-; SANDY-NEXT: decw (%ecx) # sched: [7:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_dec16:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: decw %ax # sched: [1:0.25]
-; HASWELL-NEXT: decw (%ecx) # sched: [7:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_dec16:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: decw %ax # sched: [1:0.25]
-; BROADWELL-NEXT: decw (%ecx) # sched: [7:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_dec16:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: decw %ax # sched: [1:0.25]
-; SKYLAKE-NEXT: decw (%ecx) # sched: [7:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_dec16:
-; SKX: # %bb.0:
-; SKX-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: decw %ax # sched: [1:0.25]
-; SKX-NEXT: decw (%ecx) # sched: [7:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_dec16:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: decw %ax # sched: [1:0.50]
-; BDVER2-NEXT: decw (%ecx) # sched: [6:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_dec16:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: decw %ax # sched: [1:0.50]
-; BTVER2-NEXT: decw (%ecx) # sched: [5:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_dec16:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: decw %ax # sched: [1:0.25]
-; ZNVER1-NEXT: decw (%ecx) # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm "decw $0 \0A\09 decw $1", "r,*m"(i16 %a0, i16* %a1) nounwind
- ret void
-}
-define void @test_dec32(i32 %a0, i32* %a1) optsize {
-; GENERIC-LABEL: test_dec32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: decl %eax
-; GENERIC-NEXT: decl (%ecx)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_dec32:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: decl %eax # sched: [1:0.50]
-; ATOM-NEXT: decl (%ecx) # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_dec32:
-; SLM: # %bb.0:
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: decl %eax # sched: [1:0.50]
-; SLM-NEXT: decl (%ecx) # sched: [5:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_dec32:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: decl %eax # sched: [1:0.33]
-; SANDY-NEXT: decl (%ecx) # sched: [7:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_dec32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: decl %eax # sched: [1:0.25]
-; HASWELL-NEXT: decl (%ecx) # sched: [7:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_dec32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: decl %eax # sched: [1:0.25]
-; BROADWELL-NEXT: decl (%ecx) # sched: [7:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_dec32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: decl %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: decl (%ecx) # sched: [7:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_dec32:
-; SKX: # %bb.0:
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: decl %eax # sched: [1:0.25]
-; SKX-NEXT: decl (%ecx) # sched: [7:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_dec32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: decl %eax # sched: [1:0.50]
-; BDVER2-NEXT: decl (%ecx) # sched: [6:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_dec32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: decl %eax # sched: [1:0.50]
-; BTVER2-NEXT: decl (%ecx) # sched: [5:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_dec32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: decl %eax # sched: [1:0.25]
-; ZNVER1-NEXT: decl (%ecx) # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm "decl $0 \0A\09 decl $1", "r,*m"(i32 %a0, i32* %a1) nounwind
- ret void
-}
-
-define void @test_inc16(i16 %a0, i16* %a1) optsize {
-; GENERIC-LABEL: test_inc16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: incw %ax
-; GENERIC-NEXT: incw (%ecx)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_inc16:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: incw %ax # sched: [1:0.50]
-; ATOM-NEXT: incw (%ecx) # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_inc16:
-; SLM: # %bb.0:
-; SLM-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: incw %ax # sched: [1:0.50]
-; SLM-NEXT: incw (%ecx) # sched: [5:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_inc16:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: incw %ax # sched: [1:0.33]
-; SANDY-NEXT: incw (%ecx) # sched: [7:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_inc16:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: incw %ax # sched: [1:0.25]
-; HASWELL-NEXT: incw (%ecx) # sched: [7:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_inc16:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: incw %ax # sched: [1:0.25]
-; BROADWELL-NEXT: incw (%ecx) # sched: [7:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_inc16:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: incw %ax # sched: [1:0.25]
-; SKYLAKE-NEXT: incw (%ecx) # sched: [7:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_inc16:
-; SKX: # %bb.0:
-; SKX-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: incw %ax # sched: [1:0.25]
-; SKX-NEXT: incw (%ecx) # sched: [7:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_inc16:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: incw %ax # sched: [1:0.50]
-; BDVER2-NEXT: incw (%ecx) # sched: [6:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_inc16:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: incw %ax # sched: [1:0.50]
-; BTVER2-NEXT: incw (%ecx) # sched: [5:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_inc16:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: incw %ax # sched: [1:0.25]
-; ZNVER1-NEXT: incw (%ecx) # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm "incw $0 \0A\09 incw $1", "r,*m"(i16 %a0, i16* %a1) nounwind
- ret void
-}
-define void @test_inc32(i32 %a0, i32* %a1) optsize {
-; GENERIC-LABEL: test_inc32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: incl %eax
-; GENERIC-NEXT: incl (%ecx)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_inc32:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: incl %eax # sched: [1:0.50]
-; ATOM-NEXT: incl (%ecx) # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_inc32:
-; SLM: # %bb.0:
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: incl %eax # sched: [1:0.50]
-; SLM-NEXT: incl (%ecx) # sched: [5:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_inc32:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: incl %eax # sched: [1:0.33]
-; SANDY-NEXT: incl (%ecx) # sched: [7:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_inc32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: incl %eax # sched: [1:0.25]
-; HASWELL-NEXT: incl (%ecx) # sched: [7:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_inc32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: incl %eax # sched: [1:0.25]
-; BROADWELL-NEXT: incl (%ecx) # sched: [7:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_inc32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: incl %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: incl (%ecx) # sched: [7:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_inc32:
-; SKX: # %bb.0:
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: incl %eax # sched: [1:0.25]
-; SKX-NEXT: incl (%ecx) # sched: [7:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_inc32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: incl %eax # sched: [1:0.50]
-; BDVER2-NEXT: incl (%ecx) # sched: [6:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_inc32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: incl %eax # sched: [1:0.50]
-; BTVER2-NEXT: incl (%ecx) # sched: [5:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_inc32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: incl %eax # sched: [1:0.25]
-; ZNVER1-NEXT: incl (%ecx) # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm "incl $0 \0A\09 incl $1", "r,*m"(i32 %a0, i32* %a1) nounwind
- ret void
-}
-
-define void @test_into() optsize {
-; GENERIC-LABEL: test_into:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: into
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_into:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: into # sched: [6:3.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_into:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: into # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_into:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: into # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_into:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: into # sched: [1:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_into:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: into # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_into:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: into # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_into:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: into # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_into:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: into # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_into:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: into # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_into:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: into # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- call void asm sideeffect "into", ""()
- ret void
-}
-
-; TODO - test_jmp
-
-define void @test_jcxz_jecxz() optsize {
-; GENERIC-LABEL: test_jcxz_jecxz:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: JXTGT:
-; GENERIC-NEXT: jcxz JXTGT
-; GENERIC-NEXT: jecxz JXTGT
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_jcxz_jecxz:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: JXTGT:
-; ATOM-NEXT: jcxz JXTGT # sched: [4:2.00]
-; ATOM-NEXT: jecxz JXTGT # sched: [4:2.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_jcxz_jecxz:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: JXTGT:
-; SLM-NEXT: jcxz JXTGT # sched: [1:1.00]
-; SLM-NEXT: jecxz JXTGT # sched: [1:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_jcxz_jecxz:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: JXTGT:
-; SANDY-NEXT: jcxz JXTGT # sched: [2:1.00]
-; SANDY-NEXT: jecxz JXTGT # sched: [2:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_jcxz_jecxz:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: JXTGT:
-; HASWELL-NEXT: jcxz JXTGT # sched: [2:0.50]
-; HASWELL-NEXT: jecxz JXTGT # sched: [2:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_jcxz_jecxz:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: JXTGT:
-; BROADWELL-NEXT: jcxz JXTGT # sched: [2:0.50]
-; BROADWELL-NEXT: jecxz JXTGT # sched: [2:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_jcxz_jecxz:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: JXTGT:
-; SKYLAKE-NEXT: jcxz JXTGT # sched: [2:0.50]
-; SKYLAKE-NEXT: jecxz JXTGT # sched: [2:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_jcxz_jecxz:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: JXTGT:
-; SKX-NEXT: jcxz JXTGT # sched: [2:0.50]
-; SKX-NEXT: jecxz JXTGT # sched: [2:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_jcxz_jecxz:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: JXTGT:
-; BDVER2-NEXT: jcxz JXTGT # sched: [1:1.00]
-; BDVER2-NEXT: jecxz JXTGT # sched: [1:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_jcxz_jecxz:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: JXTGT:
-; BTVER2-NEXT: jcxz JXTGT # sched: [1:0.50]
-; BTVER2-NEXT: jecxz JXTGT # sched: [1:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_jcxz_jecxz:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: JXTGT:
-; ZNVER1-NEXT: jcxz JXTGT # sched: [1:0.50]
-; ZNVER1-NEXT: jecxz JXTGT # sched: [1:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- call void asm sideeffect "JXTGT: \0A\09 jcxz JXTGT \0A\09 jecxz JXTGT", ""()
- ret void
-}
-
-; TODO - test_lds
-
-define void @test_leave() optsize {
-; GENERIC-LABEL: test_leave:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: leave
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_leave:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: leave # sched: [2:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_leave:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: leave # sched: [1:0.50]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_leave:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: leave # sched: [7:0.67]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_leave:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: leave # sched: [7:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_leave:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: leave # sched: [7:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_leave:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: leave # sched: [7:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_leave:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: leave # sched: [7:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_leave:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: leave # sched: [1:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_leave:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: leave # sched: [1:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_leave:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: leave # sched: [8:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm "leave", ""() nounwind
- ret void
-}
-
-; TODO - test_les
-
-define void @test_pop_push() optsize {
-; GENERIC-LABEL: test_pop_push:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: popl %ds
-; GENERIC-NEXT: popl %es
-; GENERIC-NEXT: popl %ss
-; GENERIC-NEXT: popl %fs
-; GENERIC-NEXT: popl %gs
-; GENERIC-NEXT: pushl %cs
-; GENERIC-NEXT: pushl %ds
-; GENERIC-NEXT: pushl %es
-; GENERIC-NEXT: pushl %ss
-; GENERIC-NEXT: pushl %fs
-; GENERIC-NEXT: pushl %gs
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_pop_push:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: popl %ds # sched: [29:14.50]
-; ATOM-NEXT: popl %es # sched: [29:14.50]
-; ATOM-NEXT: popl %ss # sched: [48:24.00]
-; ATOM-NEXT: popl %fs # sched: [29:14.50]
-; ATOM-NEXT: popl %gs # sched: [29:14.50]
-; ATOM-NEXT: pushl %cs # sched: [2:1.00]
-; ATOM-NEXT: pushl %ds # sched: [2:1.00]
-; ATOM-NEXT: pushl %es # sched: [2:1.00]
-; ATOM-NEXT: pushl %ss # sched: [2:1.00]
-; ATOM-NEXT: pushl %fs # sched: [2:1.00]
-; ATOM-NEXT: pushl %gs # sched: [2:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_pop_push:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: popl %ds # sched: [100:1.00]
-; SLM-NEXT: popl %es # sched: [100:1.00]
-; SLM-NEXT: popl %ss # sched: [100:1.00]
-; SLM-NEXT: popl %fs # sched: [100:1.00]
-; SLM-NEXT: popl %gs # sched: [100:1.00]
-; SLM-NEXT: pushl %cs # sched: [100:1.00]
-; SLM-NEXT: pushl %ds # sched: [100:1.00]
-; SLM-NEXT: pushl %es # sched: [100:1.00]
-; SLM-NEXT: pushl %ss # sched: [100:1.00]
-; SLM-NEXT: pushl %fs # sched: [100:1.00]
-; SLM-NEXT: pushl %gs # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pop_push:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: popl %ds # sched: [100:0.33]
-; SANDY-NEXT: popl %es # sched: [100:0.33]
-; SANDY-NEXT: popl %ss # sched: [100:0.33]
-; SANDY-NEXT: popl %fs # sched: [100:0.33]
-; SANDY-NEXT: popl %gs # sched: [100:0.33]
-; SANDY-NEXT: pushl %cs # sched: [100:0.33]
-; SANDY-NEXT: pushl %ds # sched: [100:0.33]
-; SANDY-NEXT: pushl %es # sched: [100:0.33]
-; SANDY-NEXT: pushl %ss # sched: [100:0.33]
-; SANDY-NEXT: pushl %fs # sched: [100:0.33]
-; SANDY-NEXT: pushl %gs # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_pop_push:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: popl %ds # sched: [100:0.25]
-; HASWELL-NEXT: popl %es # sched: [100:0.25]
-; HASWELL-NEXT: popl %ss # sched: [100:0.25]
-; HASWELL-NEXT: popl %fs # sched: [100:0.25]
-; HASWELL-NEXT: popl %gs # sched: [100:0.25]
-; HASWELL-NEXT: pushl %cs # sched: [100:0.25]
-; HASWELL-NEXT: pushl %ds # sched: [100:0.25]
-; HASWELL-NEXT: pushl %es # sched: [100:0.25]
-; HASWELL-NEXT: pushl %ss # sched: [100:0.25]
-; HASWELL-NEXT: pushl %fs # sched: [100:0.25]
-; HASWELL-NEXT: pushl %gs # sched: [100:0.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pop_push:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: popl %ds # sched: [100:0.25]
-; BROADWELL-NEXT: popl %es # sched: [100:0.25]
-; BROADWELL-NEXT: popl %ss # sched: [100:0.25]
-; BROADWELL-NEXT: popl %fs # sched: [100:0.25]
-; BROADWELL-NEXT: popl %gs # sched: [100:0.25]
-; BROADWELL-NEXT: pushl %cs # sched: [100:0.25]
-; BROADWELL-NEXT: pushl %ds # sched: [100:0.25]
-; BROADWELL-NEXT: pushl %es # sched: [100:0.25]
-; BROADWELL-NEXT: pushl %ss # sched: [100:0.25]
-; BROADWELL-NEXT: pushl %fs # sched: [100:0.25]
-; BROADWELL-NEXT: pushl %gs # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_pop_push:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: popl %ds # sched: [100:0.25]
-; SKYLAKE-NEXT: popl %es # sched: [100:0.25]
-; SKYLAKE-NEXT: popl %ss # sched: [100:0.25]
-; SKYLAKE-NEXT: popl %fs # sched: [100:0.25]
-; SKYLAKE-NEXT: popl %gs # sched: [100:0.25]
-; SKYLAKE-NEXT: pushl %cs # sched: [100:0.25]
-; SKYLAKE-NEXT: pushl %ds # sched: [100:0.25]
-; SKYLAKE-NEXT: pushl %es # sched: [100:0.25]
-; SKYLAKE-NEXT: pushl %ss # sched: [100:0.25]
-; SKYLAKE-NEXT: pushl %fs # sched: [100:0.25]
-; SKYLAKE-NEXT: pushl %gs # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_pop_push:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: popl %ds # sched: [100:0.25]
-; SKX-NEXT: popl %es # sched: [100:0.25]
-; SKX-NEXT: popl %ss # sched: [100:0.25]
-; SKX-NEXT: popl %fs # sched: [100:0.25]
-; SKX-NEXT: popl %gs # sched: [100:0.25]
-; SKX-NEXT: pushl %cs # sched: [100:0.25]
-; SKX-NEXT: pushl %ds # sched: [100:0.25]
-; SKX-NEXT: pushl %es # sched: [100:0.25]
-; SKX-NEXT: pushl %ss # sched: [100:0.25]
-; SKX-NEXT: pushl %fs # sched: [100:0.25]
-; SKX-NEXT: pushl %gs # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_pop_push:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: popl %ds # sched: [100:0.50]
-; BDVER2-NEXT: popl %es # sched: [100:0.50]
-; BDVER2-NEXT: popl %ss # sched: [100:0.50]
-; BDVER2-NEXT: popl %fs # sched: [100:0.50]
-; BDVER2-NEXT: popl %gs # sched: [100:0.50]
-; BDVER2-NEXT: pushl %cs # sched: [100:0.50]
-; BDVER2-NEXT: pushl %ds # sched: [100:0.50]
-; BDVER2-NEXT: pushl %es # sched: [100:0.50]
-; BDVER2-NEXT: pushl %ss # sched: [100:0.50]
-; BDVER2-NEXT: pushl %fs # sched: [100:0.50]
-; BDVER2-NEXT: pushl %gs # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pop_push:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: popl %ds # sched: [100:0.50]
-; BTVER2-NEXT: popl %es # sched: [100:0.50]
-; BTVER2-NEXT: popl %ss # sched: [100:0.50]
-; BTVER2-NEXT: popl %fs # sched: [100:0.50]
-; BTVER2-NEXT: popl %gs # sched: [100:0.50]
-; BTVER2-NEXT: pushl %cs # sched: [100:0.50]
-; BTVER2-NEXT: pushl %ds # sched: [100:0.50]
-; BTVER2-NEXT: pushl %es # sched: [100:0.50]
-; BTVER2-NEXT: pushl %ss # sched: [100:0.50]
-; BTVER2-NEXT: pushl %fs # sched: [100:0.50]
-; BTVER2-NEXT: pushl %gs # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pop_push:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: popl %ds # sched: [100:0.25]
-; ZNVER1-NEXT: popl %es # sched: [100:0.25]
-; ZNVER1-NEXT: popl %ss # sched: [100:0.25]
-; ZNVER1-NEXT: popl %fs # sched: [100:0.25]
-; ZNVER1-NEXT: popl %gs # sched: [100:0.25]
-; ZNVER1-NEXT: pushl %cs # sched: [100:0.25]
-; ZNVER1-NEXT: pushl %ds # sched: [100:0.25]
-; ZNVER1-NEXT: pushl %es # sched: [100:0.25]
-; ZNVER1-NEXT: pushl %ss # sched: [100:0.25]
-; ZNVER1-NEXT: pushl %fs # sched: [100:0.25]
-; ZNVER1-NEXT: pushl %gs # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- call void asm sideeffect "pop %DS \0A\09 pop %ES \0A\09 pop %SS \0A\09 pop %FS \0A\09 pop %GS \0A\09 push %CS \0A\09 push %DS \0A\09 push %ES \0A\09 push %SS \0A\09 push %FS \0A\09 push %GS", ""()
- ret void
-}
-define i16 @test_pop_push_16(i16 %a0, i16 *%a1) optsize {
-; GENERIC-LABEL: test_pop_push_16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: popw %ax
-; GENERIC-NEXT: popw (%ecx)
-; GENERIC-NEXT: pushw %ax
-; GENERIC-NEXT: pushw (%ecx)
-; GENERIC-NEXT: pushw $4095 # imm = 0xFFF
-; GENERIC-NEXT: pushw $7
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_pop_push_16:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: popw %ax # sched: [2:1.00]
-; ATOM-NEXT: popw (%ecx) # sched: [3:1.50]
-; ATOM-NEXT: pushw %ax # sched: [1:1.00]
-; ATOM-NEXT: pushw (%ecx) # sched: [2:1.00]
-; ATOM-NEXT: pushw $4095 # imm = 0xFFF
-; ATOM-NEXT: # sched: [1:1.00]
-; ATOM-NEXT: pushw $7 # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_pop_push_16:
-; SLM: # %bb.0:
-; SLM-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: popw %ax # sched: [3:1.00]
-; SLM-NEXT: popw (%ecx) # sched: [4:2.00]
-; SLM-NEXT: pushw %ax # sched: [1:1.00]
-; SLM-NEXT: pushw (%ecx) # sched: [4:2.00]
-; SLM-NEXT: pushw $4095 # imm = 0xFFF
-; SLM-NEXT: # sched: [1:1.00]
-; SLM-NEXT: pushw $7 # sched: [1:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pop_push_16:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: popw %ax # sched: [6:0.50]
-; SANDY-NEXT: popw (%ecx) # sched: [6:0.50]
-; SANDY-NEXT: pushw %ax # sched: [5:1.00]
-; SANDY-NEXT: pushw (%ecx) # sched: [5:1.00]
-; SANDY-NEXT: pushw $4095 # imm = 0xFFF
-; SANDY-NEXT: # sched: [1:1.00]
-; SANDY-NEXT: pushw $7 # sched: [1:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_pop_push_16:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: popw %ax # sched: [6:0.50]
-; HASWELL-NEXT: popw (%ecx) # sched: [7:1.00]
-; HASWELL-NEXT: pushw %ax # sched: [2:1.00]
-; HASWELL-NEXT: pushw (%ecx) # sched: [7:1.00]
-; HASWELL-NEXT: pushw $4095 # imm = 0xFFF
-; HASWELL-NEXT: # sched: [1:1.00]
-; HASWELL-NEXT: pushw $7 # sched: [1:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pop_push_16:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: popw %ax # sched: [6:0.50]
-; BROADWELL-NEXT: popw (%ecx) # sched: [6:1.00]
-; BROADWELL-NEXT: pushw %ax # sched: [2:1.00]
-; BROADWELL-NEXT: pushw (%ecx) # sched: [6:1.00]
-; BROADWELL-NEXT: pushw $4095 # imm = 0xFFF
-; BROADWELL-NEXT: # sched: [1:1.00]
-; BROADWELL-NEXT: pushw $7 # sched: [1:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_pop_push_16:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: popw %ax # sched: [6:0.50]
-; SKYLAKE-NEXT: popw (%ecx) # sched: [6:1.00]
-; SKYLAKE-NEXT: pushw %ax # sched: [2:1.00]
-; SKYLAKE-NEXT: pushw (%ecx) # sched: [6:1.00]
-; SKYLAKE-NEXT: pushw $4095 # imm = 0xFFF
-; SKYLAKE-NEXT: # sched: [1:1.00]
-; SKYLAKE-NEXT: pushw $7 # sched: [1:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_pop_push_16:
-; SKX: # %bb.0:
-; SKX-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: popw %ax # sched: [6:0.50]
-; SKX-NEXT: popw (%ecx) # sched: [6:1.00]
-; SKX-NEXT: pushw %ax # sched: [2:1.00]
-; SKX-NEXT: pushw (%ecx) # sched: [6:1.00]
-; SKX-NEXT: pushw $4095 # imm = 0xFFF
-; SKX-NEXT: # sched: [1:1.00]
-; SKX-NEXT: pushw $7 # sched: [1:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_pop_push_16:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: popw %ax # sched: [5:0.50]
-; BDVER2-NEXT: popw (%ecx) # sched: [6:1.00]
-; BDVER2-NEXT: pushw %ax # sched: [1:1.00]
-; BDVER2-NEXT: pushw (%ecx) # sched: [6:1.00]
-; BDVER2-NEXT: pushw $4095 # imm = 0xFFF
-; BDVER2-NEXT: # sched: [1:1.00]
-; BDVER2-NEXT: pushw $7 # sched: [1:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pop_push_16:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: popw %ax # sched: [3:1.00]
-; BTVER2-NEXT: popw (%ecx) # sched: [4:1.00]
-; BTVER2-NEXT: pushw %ax # sched: [1:1.00]
-; BTVER2-NEXT: pushw (%ecx) # sched: [4:1.00]
-; BTVER2-NEXT: pushw $4095 # imm = 0xFFF
-; BTVER2-NEXT: # sched: [1:1.00]
-; BTVER2-NEXT: pushw $7 # sched: [1:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pop_push_16:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: popw %ax # sched: [8:0.50]
-; ZNVER1-NEXT: popw (%ecx) # sched: [5:0.50]
-; ZNVER1-NEXT: pushw %ax # sched: [1:0.50]
-; ZNVER1-NEXT: pushw (%ecx) # sched: [4:0.50]
-; ZNVER1-NEXT: pushw $4095 # imm = 0xFFF
-; ZNVER1-NEXT: # sched: [1:0.50]
-; ZNVER1-NEXT: pushw $7 # sched: [1:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- %1 = call i16 asm sideeffect "popw $0 \0A\09 popw $2 \0A\09 pushw $1 \0A\09 pushw $2 \0A\09 pushw $3 \0A\09 pushw $4", "=r,r,*m,i,i"(i16 %a0, i16 *%a1, i16 4095, i8 7)
- ret i16 %1
-}
-define i32 @test_pop_push_32(i32 %a0, i32 *%a1) optsize {
-; GENERIC-LABEL: test_pop_push_32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: popl %eax
-; GENERIC-NEXT: popl (%ecx)
-; GENERIC-NEXT: pushl %eax
-; GENERIC-NEXT: pushl (%ecx)
-; GENERIC-NEXT: pushl $4095 # imm = 0xFFF
-; GENERIC-NEXT: pushl $7
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_pop_push_32:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: popl %eax # sched: [1:1.00]
-; ATOM-NEXT: popl (%ecx) # sched: [3:1.50]
-; ATOM-NEXT: pushl %eax # sched: [1:1.00]
-; ATOM-NEXT: pushl (%ecx) # sched: [2:1.00]
-; ATOM-NEXT: pushl $4095 # imm = 0xFFF
-; ATOM-NEXT: # sched: [1:1.00]
-; ATOM-NEXT: pushl $7 # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_pop_push_32:
-; SLM: # %bb.0:
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: popl %eax # sched: [3:1.00]
-; SLM-NEXT: popl (%ecx) # sched: [4:2.00]
-; SLM-NEXT: pushl %eax # sched: [1:1.00]
-; SLM-NEXT: pushl (%ecx) # sched: [4:2.00]
-; SLM-NEXT: pushl $4095 # imm = 0xFFF
-; SLM-NEXT: # sched: [1:1.00]
-; SLM-NEXT: pushl $7 # sched: [1:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pop_push_32:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: popl %eax # sched: [6:0.50]
-; SANDY-NEXT: popl (%ecx) # sched: [6:0.50]
-; SANDY-NEXT: pushl %eax # sched: [5:1.00]
-; SANDY-NEXT: pushl (%ecx) # sched: [5:1.00]
-; SANDY-NEXT: pushl $4095 # imm = 0xFFF
-; SANDY-NEXT: # sched: [1:1.00]
-; SANDY-NEXT: pushl $7 # sched: [1:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_pop_push_32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: popl %eax # sched: [6:0.50]
-; HASWELL-NEXT: popl (%ecx) # sched: [7:1.00]
-; HASWELL-NEXT: pushl %eax # sched: [2:1.00]
-; HASWELL-NEXT: pushl (%ecx) # sched: [7:1.00]
-; HASWELL-NEXT: pushl $4095 # imm = 0xFFF
-; HASWELL-NEXT: # sched: [1:1.00]
-; HASWELL-NEXT: pushl $7 # sched: [1:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pop_push_32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: popl %eax # sched: [6:0.50]
-; BROADWELL-NEXT: popl (%ecx) # sched: [6:1.00]
-; BROADWELL-NEXT: pushl %eax # sched: [2:1.00]
-; BROADWELL-NEXT: pushl (%ecx) # sched: [6:1.00]
-; BROADWELL-NEXT: pushl $4095 # imm = 0xFFF
-; BROADWELL-NEXT: # sched: [1:1.00]
-; BROADWELL-NEXT: pushl $7 # sched: [1:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_pop_push_32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: popl %eax # sched: [6:0.50]
-; SKYLAKE-NEXT: popl (%ecx) # sched: [6:1.00]
-; SKYLAKE-NEXT: pushl %eax # sched: [2:1.00]
-; SKYLAKE-NEXT: pushl (%ecx) # sched: [6:1.00]
-; SKYLAKE-NEXT: pushl $4095 # imm = 0xFFF
-; SKYLAKE-NEXT: # sched: [1:1.00]
-; SKYLAKE-NEXT: pushl $7 # sched: [1:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_pop_push_32:
-; SKX: # %bb.0:
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: popl %eax # sched: [6:0.50]
-; SKX-NEXT: popl (%ecx) # sched: [6:1.00]
-; SKX-NEXT: pushl %eax # sched: [2:1.00]
-; SKX-NEXT: pushl (%ecx) # sched: [6:1.00]
-; SKX-NEXT: pushl $4095 # imm = 0xFFF
-; SKX-NEXT: # sched: [1:1.00]
-; SKX-NEXT: pushl $7 # sched: [1:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_pop_push_32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: popl %eax # sched: [5:0.50]
-; BDVER2-NEXT: popl (%ecx) # sched: [6:1.00]
-; BDVER2-NEXT: pushl %eax # sched: [1:1.00]
-; BDVER2-NEXT: pushl (%ecx) # sched: [6:1.00]
-; BDVER2-NEXT: pushl $4095 # imm = 0xFFF
-; BDVER2-NEXT: # sched: [1:1.00]
-; BDVER2-NEXT: pushl $7 # sched: [1:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pop_push_32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: popl %eax # sched: [3:1.00]
-; BTVER2-NEXT: popl (%ecx) # sched: [4:1.00]
-; BTVER2-NEXT: pushl %eax # sched: [1:1.00]
-; BTVER2-NEXT: pushl (%ecx) # sched: [4:1.00]
-; BTVER2-NEXT: pushl $4095 # imm = 0xFFF
-; BTVER2-NEXT: # sched: [1:1.00]
-; BTVER2-NEXT: pushl $7 # sched: [1:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pop_push_32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: popl %eax # sched: [8:0.50]
-; ZNVER1-NEXT: popl (%ecx) # sched: [9:1.00]
-; ZNVER1-NEXT: pushl %eax # sched: [1:0.50]
-; ZNVER1-NEXT: pushl (%ecx) # sched: [4:0.50]
-; ZNVER1-NEXT: pushl $4095 # imm = 0xFFF
-; ZNVER1-NEXT: # sched: [1:0.50]
-; ZNVER1-NEXT: pushl $7 # sched: [1:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- %1 = call i32 asm sideeffect "popl $0 \0A\09 popl $2 \0A\09 pushl $1 \0A\09 pushl $2 \0A\09 pushl $3 \0A\09 pushl $4", "=r,r,*m,i,i"(i32 %a0, i32 *%a1, i32 4095, i8 7)
- ret i32 %1
-}
-
-define void @test_popa_popf_pusha_pushf() optsize {
-; GENERIC-LABEL: test_popa_popf_pusha_pushf:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: popal
-; GENERIC-NEXT: popfl
-; GENERIC-NEXT: pushal
-; GENERIC-NEXT: pushfl
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_popa_popf_pusha_pushf:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: popal # sched: [9:4.50]
-; ATOM-NEXT: popfl # sched: [26:13.00]
-; ATOM-NEXT: pushal # sched: [8:4.00]
-; ATOM-NEXT: pushfl # sched: [9:4.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_popa_popf_pusha_pushf:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: popal # sched: [3:1.00]
-; SLM-NEXT: popfl # sched: [3:1.00]
-; SLM-NEXT: pushal # sched: [1:1.00]
-; SLM-NEXT: pushfl # sched: [1:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_popa_popf_pusha_pushf:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: popal # sched: [5:0.50]
-; SANDY-NEXT: popfl # sched: [5:0.50]
-; SANDY-NEXT: pushal # sched: [1:1.00]
-; SANDY-NEXT: pushfl # sched: [1:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_popa_popf_pusha_pushf:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: popal # sched: [1:4.50]
-; HASWELL-NEXT: popfl # sched: [5:0.50]
-; HASWELL-NEXT: pushal # sched: [1:4.75]
-; HASWELL-NEXT: pushfl # sched: [1:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_popa_popf_pusha_pushf:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: popal # sched: [5:0.50]
-; BROADWELL-NEXT: popfl # sched: [5:0.50]
-; BROADWELL-NEXT: pushal # sched: [1:1.00]
-; BROADWELL-NEXT: pushfl # sched: [1:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_popa_popf_pusha_pushf:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: popal # sched: [5:0.50]
-; SKYLAKE-NEXT: popfl # sched: [5:0.50]
-; SKYLAKE-NEXT: pushal # sched: [1:1.00]
-; SKYLAKE-NEXT: pushfl # sched: [1:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_popa_popf_pusha_pushf:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: popal # sched: [5:0.50]
-; SKX-NEXT: popfl # sched: [5:0.50]
-; SKX-NEXT: pushal # sched: [1:1.00]
-; SKX-NEXT: pushfl # sched: [1:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_popa_popf_pusha_pushf:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: popal # sched: [5:0.50]
-; BDVER2-NEXT: popfl # sched: [5:0.50]
-; BDVER2-NEXT: pushal # sched: [1:1.00]
-; BDVER2-NEXT: pushfl # sched: [1:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_popa_popf_pusha_pushf:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: popal # sched: [3:1.00]
-; BTVER2-NEXT: popfl # sched: [3:1.00]
-; BTVER2-NEXT: pushal # sched: [1:1.00]
-; BTVER2-NEXT: pushfl # sched: [1:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_popa_popf_pusha_pushf:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: popal # sched: [100:0.25]
-; ZNVER1-NEXT: popfl # sched: [100:0.25]
-; ZNVER1-NEXT: pushal # sched: [8:0.50]
-; ZNVER1-NEXT: pushfl # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- call void asm sideeffect "popa \0A\09 popf \0A\09 pusha \0A\09 pushf", ""()
- ret void
-}
-
-define void @test_ret() optsize {
-; GENERIC-LABEL: test_ret:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: retl
-; GENERIC-NEXT: retl $4095 # imm = 0xFFF
-; GENERIC-NEXT: lretl
-; GENERIC-NEXT: lretl $4095 # imm = 0xFFF
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_ret:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-; ATOM-NEXT: retl $4095 # imm = 0xFFF
-; ATOM-NEXT: # sched: [1:1.00]
-; ATOM-NEXT: lretl # sched: [79:39.50]
-; ATOM-NEXT: lretl $4095 # imm = 0xFFF
-; ATOM-NEXT: # sched: [79:39.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_ret:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: retl # sched: [4:1.00]
-; SLM-NEXT: retl $4095 # imm = 0xFFF
-; SLM-NEXT: # sched: [4:1.00]
-; SLM-NEXT: lretl # sched: [4:1.00]
-; SLM-NEXT: lretl $4095 # imm = 0xFFF
-; SLM-NEXT: # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_ret:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-; SANDY-NEXT: retl $4095 # imm = 0xFFF
-; SANDY-NEXT: # sched: [6:1.00]
-; SANDY-NEXT: lretl # sched: [6:1.00]
-; SANDY-NEXT: lretl $4095 # imm = 0xFFF
-; SANDY-NEXT: # sched: [6:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_ret:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-; HASWELL-NEXT: retl $4095 # imm = 0xFFF
-; HASWELL-NEXT: # sched: [1:2.00]
-; HASWELL-NEXT: lretl # sched: [6:0.50]
-; HASWELL-NEXT: lretl $4095 # imm = 0xFFF
-; HASWELL-NEXT: # sched: [1:2.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_ret:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-; BROADWELL-NEXT: retl $4095 # imm = 0xFFF
-; BROADWELL-NEXT: # sched: [6:0.50]
-; BROADWELL-NEXT: lretl # sched: [6:0.50]
-; BROADWELL-NEXT: lretl $4095 # imm = 0xFFF
-; BROADWELL-NEXT: # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_ret:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-; SKYLAKE-NEXT: retl $4095 # imm = 0xFFF
-; SKYLAKE-NEXT: # sched: [6:0.50]
-; SKYLAKE-NEXT: lretl # sched: [6:0.50]
-; SKYLAKE-NEXT: lretl $4095 # imm = 0xFFF
-; SKYLAKE-NEXT: # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_ret:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: retl # sched: [6:0.50]
-; SKX-NEXT: retl $4095 # imm = 0xFFF
-; SKX-NEXT: # sched: [6:0.50]
-; SKX-NEXT: lretl # sched: [6:0.50]
-; SKX-NEXT: lretl $4095 # imm = 0xFFF
-; SKX-NEXT: # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_ret:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-; BDVER2-NEXT: retl $4095 # imm = 0xFFF
-; BDVER2-NEXT: # sched: [5:1.00]
-; BDVER2-NEXT: lretl # sched: [5:1.00]
-; BDVER2-NEXT: lretl $4095 # imm = 0xFFF
-; BDVER2-NEXT: # sched: [5:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_ret:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-; BTVER2-NEXT: retl $4095 # imm = 0xFFF
-; BTVER2-NEXT: # sched: [4:1.00]
-; BTVER2-NEXT: lretl # sched: [4:1.00]
-; BTVER2-NEXT: lretl $4095 # imm = 0xFFF
-; BTVER2-NEXT: # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_ret:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
-; ZNVER1-NEXT: retl $4095 # imm = 0xFFF
-; ZNVER1-NEXT: # sched: [5:0.50]
-; ZNVER1-NEXT: lretl # sched: [1:0.50]
-; ZNVER1-NEXT: lretl $4095 # imm = 0xFFF
-; ZNVER1-NEXT: # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- call void asm sideeffect "ret \0A\09 ret $0 \0A\09 lret \0A\09 lret $0", "i"(i16 4095)
- ret void
-}
-
-define i8 @test_salc() optsize {
-; GENERIC-LABEL: test_salc:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: salc
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_salc:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: salc # sched: [1:0.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_salc:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: salc # sched: [1:0.50]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_salc:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: salc # sched: [1:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_salc:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: salc # sched: [1:0.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_salc:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: salc # sched: [1:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_salc:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: salc # sched: [1:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_salc:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: salc # sched: [1:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_salc:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: salc # sched: [1:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_salc:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: salc # sched: [1:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_salc:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: salc # sched: [1:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- %1 = tail call i8 asm "salc", "=r"() nounwind
- ret i8 %1
-}
-
-; TODO - test_sgdt
-; TODO - test_sidt
-
-define void @test_xchg_32(i32 %a0, i32 %a1, i32 *%a2) optsize {
-; GENERIC-LABEL: test_xchg_32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %edx
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: xchgl %eax, %eax
-; GENERIC-NEXT: xchgl %ecx, %eax
-; GENERIC-NEXT: xchgl %eax, (%edx)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_xchg_32:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: xchgl %eax, %eax # sched: [2:1.00]
-; ATOM-NEXT: xchgl %ecx, %eax # sched: [2:1.00]
-; ATOM-NEXT: xchgl %eax, (%edx) # sched: [3:1.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_xchg_32:
-; SLM: # %bb.0:
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: xchgl %eax, %eax # sched: [1:0.50]
-; SLM-NEXT: xchgl %ecx, %eax # sched: [1:0.50]
-; SLM-NEXT: xchgl %eax, (%edx) # sched: [4:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_xchg_32:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: xchgl %eax, %eax # sched: [2:1.00]
-; SANDY-NEXT: xchgl %ecx, %eax # sched: [2:1.00]
-; SANDY-NEXT: xchgl %eax, (%edx) # sched: [6:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_xchg_32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: xchgl %eax, %eax # sched: [2:0.75]
-; HASWELL-NEXT: xchgl %ecx, %eax # sched: [2:0.75]
-; HASWELL-NEXT: xchgl %eax, (%edx) # sched: [9:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_xchg_32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: xchgl %eax, %eax # sched: [2:0.75]
-; BROADWELL-NEXT: xchgl %ecx, %eax # sched: [2:0.75]
-; BROADWELL-NEXT: xchgl %eax, (%edx) # sched: [8:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_xchg_32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: xchgl %eax, %eax # sched: [2:0.75]
-; SKYLAKE-NEXT: xchgl %ecx, %eax # sched: [2:0.75]
-; SKYLAKE-NEXT: xchgl %eax, (%edx) # sched: [10:1.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_xchg_32:
-; SKX: # %bb.0:
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: xchgl %eax, %eax # sched: [2:0.75]
-; SKX-NEXT: xchgl %ecx, %eax # sched: [2:0.75]
-; SKX-NEXT: xchgl %eax, (%edx) # sched: [10:1.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_xchg_32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: xchgl %eax, %eax # sched: [1:1.00]
-; BDVER2-NEXT: xchgl %ecx, %eax # sched: [1:1.00]
-; BDVER2-NEXT: xchgl %eax, (%edx) # sched: [5:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_xchg_32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: xchgl %eax, %eax # sched: [1:0.50]
-; BTVER2-NEXT: xchgl %ecx, %eax # sched: [1:0.50]
-; BTVER2-NEXT: xchgl %eax, (%edx) # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_xchg_32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: xchgl %eax, %eax # sched: [1:0.50]
-; ZNVER1-NEXT: xchgl %ecx, %eax # sched: [1:0.50]
-; ZNVER1-NEXT: xchgl %eax, (%edx) # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm "xchg %EAX, $0 \0A\09 xchg $1, $0 \0A\09 xchg $2, $0", "r,r,*m"(i32 %a0, i32 %a1, i32 *%a2) nounwind
- ret void
-}
diff --git a/llvm/test/CodeGen/X86/schedule-x86_64.ll b/llvm/test/CodeGen/X86/schedule-x86_64.ll
deleted file mode 100644
index 6d9f4241afc..00000000000
--- a/llvm/test/CodeGen/X86/schedule-x86_64.ll
+++ /dev/null
@@ -1,18893 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
-
-define void @test_adc_8(i8 %a0, i8* %a1, i8 %a2) optsize {
-; GENERIC-LABEL: test_adc_8:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: adcb $7, %al # sched: [2:0.67]
-; GENERIC-NEXT: adcb $7, %dil # sched: [2:0.67]
-; GENERIC-NEXT: adcb $7, (%rsi) # sched: [9:1.00]
-; GENERIC-NEXT: adcb %dl, %dil # sched: [2:0.67]
-; GENERIC-NEXT: adcb %dil, (%rsi) # sched: [9:1.00]
-; GENERIC-NEXT: adcb (%rsi), %dil # sched: [7:0.67]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_adc_8:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: adcb $7, %al # sched: [1:0.50]
-; ATOM-NEXT: adcb $7, %dil # sched: [1:0.50]
-; ATOM-NEXT: adcb $7, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: adcb %dl, %dil # sched: [1:0.50]
-; ATOM-NEXT: adcb %dil, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: adcb (%rsi), %dil # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_adc_8:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: adcb $7, %al # sched: [1:0.50]
-; SLM-NEXT: adcb $7, %dil # sched: [1:0.50]
-; SLM-NEXT: adcb $7, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: adcb %dl, %dil # sched: [1:0.50]
-; SLM-NEXT: adcb %dil, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: adcb (%rsi), %dil # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_adc_8:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: adcb $7, %al # sched: [2:0.67]
-; SANDY-NEXT: adcb $7, %dil # sched: [2:0.67]
-; SANDY-NEXT: adcb $7, (%rsi) # sched: [9:1.00]
-; SANDY-NEXT: adcb %dl, %dil # sched: [2:0.67]
-; SANDY-NEXT: adcb %dil, (%rsi) # sched: [9:1.00]
-; SANDY-NEXT: adcb (%rsi), %dil # sched: [7:0.67]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_adc_8:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: adcb $7, %al # sched: [2:0.50]
-; HASWELL-NEXT: adcb $7, %dil # sched: [2:0.50]
-; HASWELL-NEXT: adcb $7, (%rsi) # sched: [9:1.00]
-; HASWELL-NEXT: adcb %dl, %dil # sched: [2:0.50]
-; HASWELL-NEXT: adcb %dil, (%rsi) # sched: [9:1.00]
-; HASWELL-NEXT: adcb (%rsi), %dil # sched: [7:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_adc_8:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: adcb $7, %al # sched: [2:0.50]
-; BROADWELL-NEXT: adcb $7, %dil # sched: [2:0.50]
-; BROADWELL-NEXT: adcb $7, (%rsi) # sched: [8:1.00]
-; BROADWELL-NEXT: adcb %dl, %dil # sched: [1:0.50]
-; BROADWELL-NEXT: adcb %dil, (%rsi) # sched: [8:1.00]
-; BROADWELL-NEXT: adcb (%rsi), %dil # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_adc_8:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: adcb $7, %al # sched: [2:0.50]
-; SKYLAKE-NEXT: adcb $7, %dil # sched: [2:0.50]
-; SKYLAKE-NEXT: adcb $7, (%rsi) # sched: [8:1.00]
-; SKYLAKE-NEXT: adcb %dl, %dil # sched: [1:0.50]
-; SKYLAKE-NEXT: adcb %dil, (%rsi) # sched: [8:1.00]
-; SKYLAKE-NEXT: adcb (%rsi), %dil # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_adc_8:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: adcb $7, %al # sched: [2:0.50]
-; SKX-NEXT: adcb $7, %dil # sched: [2:0.50]
-; SKX-NEXT: adcb $7, (%rsi) # sched: [8:1.00]
-; SKX-NEXT: adcb %dl, %dil # sched: [1:0.50]
-; SKX-NEXT: adcb %dil, (%rsi) # sched: [8:1.00]
-; SKX-NEXT: adcb (%rsi), %dil # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_adc_8:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: adcb $7, %al # sched: [1:1.00]
-; BDVER2-NEXT: adcb $7, %dil # sched: [1:1.00]
-; BDVER2-NEXT: adcb $7, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: adcb %dl, %dil # sched: [1:1.00]
-; BDVER2-NEXT: adcb %dil, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: adcb (%rsi), %dil # sched: [5:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_adc_8:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: adcb $7, %al # sched: [1:1.00]
-; BTVER2-NEXT: adcb $7, %dil # sched: [1:1.00]
-; BTVER2-NEXT: adcb $7, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: adcb %dl, %dil # sched: [1:1.00]
-; BTVER2-NEXT: adcb %dil, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: adcb (%rsi), %dil # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_adc_8:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: adcb $7, %al # sched: [1:0.25]
-; ZNVER1-NEXT: adcb $7, %dil # sched: [1:0.25]
-; ZNVER1-NEXT: adcb $7, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: adcb %dl, %dil # sched: [1:0.25]
-; ZNVER1-NEXT: adcb %dil, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: adcb (%rsi), %dil # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "adcb $3, %AL \0A\09 adcb $3, $0 \0A\09 adcb $3, $2 \0A\09 adcb $1, $0 \0A\09 adcb $0, $2 \0A\09 adcb $2, $0", "r,r,*m,i"(i8 %a0, i8 %a2, i8* %a1, i8 7) nounwind
- ret void
-}
-define void @test_adc_16(i16 %a0, i16* %a1, i16 %a2) optsize {
-; GENERIC-LABEL: test_adc_16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: adcw $511, %ax # imm = 0x1FF
-; GENERIC-NEXT: # sched: [2:0.67]
-; GENERIC-NEXT: adcw $511, %di # imm = 0x1FF
-; GENERIC-NEXT: # sched: [2:0.67]
-; GENERIC-NEXT: adcw $511, (%rsi) # imm = 0x1FF
-; GENERIC-NEXT: # sched: [9:1.00]
-; GENERIC-NEXT: adcw $7, %di # sched: [2:0.67]
-; GENERIC-NEXT: adcw $7, (%rsi) # sched: [9:1.00]
-; GENERIC-NEXT: adcw %dx, %di # sched: [2:0.67]
-; GENERIC-NEXT: adcw %di, (%rsi) # sched: [9:1.00]
-; GENERIC-NEXT: adcw (%rsi), %di # sched: [7:0.67]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_adc_16:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: adcw $511, %ax # imm = 0x1FF
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: adcw $511, %di # imm = 0x1FF
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: adcw $511, (%rsi) # imm = 0x1FF
-; ATOM-NEXT: # sched: [1:1.00]
-; ATOM-NEXT: adcw $7, %di # sched: [1:0.50]
-; ATOM-NEXT: adcw $7, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: adcw %dx, %di # sched: [1:0.50]
-; ATOM-NEXT: adcw %di, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: adcw (%rsi), %di # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_adc_16:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: adcw $511, %ax # imm = 0x1FF
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: adcw $511, %di # imm = 0x1FF
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: adcw $511, (%rsi) # imm = 0x1FF
-; SLM-NEXT: # sched: [5:2.00]
-; SLM-NEXT: adcw $7, %di # sched: [1:0.50]
-; SLM-NEXT: adcw $7, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: adcw %dx, %di # sched: [1:0.50]
-; SLM-NEXT: adcw %di, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: adcw (%rsi), %di # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_adc_16:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: adcw $511, %ax # imm = 0x1FF
-; SANDY-NEXT: # sched: [2:0.67]
-; SANDY-NEXT: adcw $511, %di # imm = 0x1FF
-; SANDY-NEXT: # sched: [2:0.67]
-; SANDY-NEXT: adcw $511, (%rsi) # imm = 0x1FF
-; SANDY-NEXT: # sched: [9:1.00]
-; SANDY-NEXT: adcw $7, %di # sched: [2:0.67]
-; SANDY-NEXT: adcw $7, (%rsi) # sched: [9:1.00]
-; SANDY-NEXT: adcw %dx, %di # sched: [2:0.67]
-; SANDY-NEXT: adcw %di, (%rsi) # sched: [9:1.00]
-; SANDY-NEXT: adcw (%rsi), %di # sched: [7:0.67]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_adc_16:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: adcw $511, %ax # imm = 0x1FF
-; HASWELL-NEXT: # sched: [2:0.50]
-; HASWELL-NEXT: adcw $511, %di # imm = 0x1FF
-; HASWELL-NEXT: # sched: [2:0.50]
-; HASWELL-NEXT: adcw $511, (%rsi) # imm = 0x1FF
-; HASWELL-NEXT: # sched: [9:1.00]
-; HASWELL-NEXT: adcw $7, %di # sched: [2:0.50]
-; HASWELL-NEXT: adcw $7, (%rsi) # sched: [9:1.00]
-; HASWELL-NEXT: adcw %dx, %di # sched: [2:0.50]
-; HASWELL-NEXT: adcw %di, (%rsi) # sched: [9:1.00]
-; HASWELL-NEXT: adcw (%rsi), %di # sched: [7:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_adc_16:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: adcw $511, %ax # imm = 0x1FF
-; BROADWELL-NEXT: # sched: [1:0.50]
-; BROADWELL-NEXT: adcw $511, %di # imm = 0x1FF
-; BROADWELL-NEXT: # sched: [1:0.50]
-; BROADWELL-NEXT: adcw $511, (%rsi) # imm = 0x1FF
-; BROADWELL-NEXT: # sched: [8:1.00]
-; BROADWELL-NEXT: adcw $7, %di # sched: [1:0.50]
-; BROADWELL-NEXT: adcw $7, (%rsi) # sched: [8:1.00]
-; BROADWELL-NEXT: adcw %dx, %di # sched: [1:0.50]
-; BROADWELL-NEXT: adcw %di, (%rsi) # sched: [8:1.00]
-; BROADWELL-NEXT: adcw (%rsi), %di # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_adc_16:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: adcw $511, %ax # imm = 0x1FF
-; SKYLAKE-NEXT: # sched: [1:0.50]
-; SKYLAKE-NEXT: adcw $511, %di # imm = 0x1FF
-; SKYLAKE-NEXT: # sched: [1:0.50]
-; SKYLAKE-NEXT: adcw $511, (%rsi) # imm = 0x1FF
-; SKYLAKE-NEXT: # sched: [8:1.00]
-; SKYLAKE-NEXT: adcw $7, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: adcw $7, (%rsi) # sched: [8:1.00]
-; SKYLAKE-NEXT: adcw %dx, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: adcw %di, (%rsi) # sched: [8:1.00]
-; SKYLAKE-NEXT: adcw (%rsi), %di # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_adc_16:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: adcw $511, %ax # imm = 0x1FF
-; SKX-NEXT: # sched: [1:0.50]
-; SKX-NEXT: adcw $511, %di # imm = 0x1FF
-; SKX-NEXT: # sched: [1:0.50]
-; SKX-NEXT: adcw $511, (%rsi) # imm = 0x1FF
-; SKX-NEXT: # sched: [8:1.00]
-; SKX-NEXT: adcw $7, %di # sched: [1:0.50]
-; SKX-NEXT: adcw $7, (%rsi) # sched: [8:1.00]
-; SKX-NEXT: adcw %dx, %di # sched: [1:0.50]
-; SKX-NEXT: adcw %di, (%rsi) # sched: [8:1.00]
-; SKX-NEXT: adcw (%rsi), %di # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_adc_16:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: adcw $511, %ax # imm = 0x1FF
-; BDVER2-NEXT: # sched: [1:1.00]
-; BDVER2-NEXT: adcw $511, %di # imm = 0x1FF
-; BDVER2-NEXT: # sched: [1:1.00]
-; BDVER2-NEXT: adcw $511, (%rsi) # imm = 0x1FF
-; BDVER2-NEXT: # sched: [6:1.00]
-; BDVER2-NEXT: adcw $7, %di # sched: [1:1.00]
-; BDVER2-NEXT: adcw $7, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: adcw %dx, %di # sched: [1:1.00]
-; BDVER2-NEXT: adcw %di, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: adcw (%rsi), %di # sched: [5:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_adc_16:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: adcw $511, %ax # imm = 0x1FF
-; BTVER2-NEXT: # sched: [1:1.00]
-; BTVER2-NEXT: adcw $511, %di # imm = 0x1FF
-; BTVER2-NEXT: # sched: [1:1.00]
-; BTVER2-NEXT: adcw $511, (%rsi) # imm = 0x1FF
-; BTVER2-NEXT: # sched: [5:1.00]
-; BTVER2-NEXT: adcw $7, %di # sched: [1:1.00]
-; BTVER2-NEXT: adcw $7, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: adcw %dx, %di # sched: [1:1.00]
-; BTVER2-NEXT: adcw %di, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: adcw (%rsi), %di # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_adc_16:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: adcw $511, %ax # imm = 0x1FF
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: adcw $511, %di # imm = 0x1FF
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: adcw $511, (%rsi) # imm = 0x1FF
-; ZNVER1-NEXT: # sched: [5:0.50]
-; ZNVER1-NEXT: adcw $7, %di # sched: [1:0.25]
-; ZNVER1-NEXT: adcw $7, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: adcw %dx, %di # sched: [1:0.25]
-; ZNVER1-NEXT: adcw %di, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: adcw (%rsi), %di # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "adcw $3, %AX \0A\09 adcw $3, $0 \0A\09 adcw $3, $2 \0A\09 adcw $4, $0 \0A\09 adcw $4, $2 \0A\09 adcw $1, $0 \0A\09 adcw $0, $2 \0A\09 adcw $2, $0", "r,r,*m,i,i"(i16 %a0, i16 %a2, i16* %a1, i16 511, i8 7) nounwind
- ret void
-}
-define void @test_adc_32(i32 %a0, i32* %a1, i32 %a2) optsize {
-; GENERIC-LABEL: test_adc_32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: adcl $665536, %eax # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [2:0.67]
-; GENERIC-NEXT: adcl $665536, %edi # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [2:0.67]
-; GENERIC-NEXT: adcl $665536, (%rsi) # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [9:1.00]
-; GENERIC-NEXT: adcl $7, %edi # sched: [2:0.67]
-; GENERIC-NEXT: adcl $7, (%rsi) # sched: [9:1.00]
-; GENERIC-NEXT: adcl %edx, %edi # sched: [2:0.67]
-; GENERIC-NEXT: adcl %edi, (%rsi) # sched: [9:1.00]
-; GENERIC-NEXT: adcl (%rsi), %edi # sched: [7:0.67]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_adc_32:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: adcl $665536, %eax # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: adcl $665536, %edi # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: adcl $665536, (%rsi) # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:1.00]
-; ATOM-NEXT: adcl $7, %edi # sched: [1:0.50]
-; ATOM-NEXT: adcl $7, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: adcl %edx, %edi # sched: [1:0.50]
-; ATOM-NEXT: adcl %edi, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: adcl (%rsi), %edi # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_adc_32:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: adcl $665536, %eax # imm = 0xA27C0
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: adcl $665536, %edi # imm = 0xA27C0
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: adcl $665536, (%rsi) # imm = 0xA27C0
-; SLM-NEXT: # sched: [5:2.00]
-; SLM-NEXT: adcl $7, %edi # sched: [1:0.50]
-; SLM-NEXT: adcl $7, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: adcl %edx, %edi # sched: [1:0.50]
-; SLM-NEXT: adcl %edi, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: adcl (%rsi), %edi # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_adc_32:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: adcl $665536, %eax # imm = 0xA27C0
-; SANDY-NEXT: # sched: [2:0.67]
-; SANDY-NEXT: adcl $665536, %edi # imm = 0xA27C0
-; SANDY-NEXT: # sched: [2:0.67]
-; SANDY-NEXT: adcl $665536, (%rsi) # imm = 0xA27C0
-; SANDY-NEXT: # sched: [9:1.00]
-; SANDY-NEXT: adcl $7, %edi # sched: [2:0.67]
-; SANDY-NEXT: adcl $7, (%rsi) # sched: [9:1.00]
-; SANDY-NEXT: adcl %edx, %edi # sched: [2:0.67]
-; SANDY-NEXT: adcl %edi, (%rsi) # sched: [9:1.00]
-; SANDY-NEXT: adcl (%rsi), %edi # sched: [7:0.67]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_adc_32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: adcl $665536, %eax # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [2:0.50]
-; HASWELL-NEXT: adcl $665536, %edi # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [2:0.50]
-; HASWELL-NEXT: adcl $665536, (%rsi) # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [9:1.00]
-; HASWELL-NEXT: adcl $7, %edi # sched: [2:0.50]
-; HASWELL-NEXT: adcl $7, (%rsi) # sched: [9:1.00]
-; HASWELL-NEXT: adcl %edx, %edi # sched: [2:0.50]
-; HASWELL-NEXT: adcl %edi, (%rsi) # sched: [9:1.00]
-; HASWELL-NEXT: adcl (%rsi), %edi # sched: [7:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_adc_32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: adcl $665536, %eax # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [1:0.50]
-; BROADWELL-NEXT: adcl $665536, %edi # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [1:0.50]
-; BROADWELL-NEXT: adcl $665536, (%rsi) # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [8:1.00]
-; BROADWELL-NEXT: adcl $7, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: adcl $7, (%rsi) # sched: [8:1.00]
-; BROADWELL-NEXT: adcl %edx, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: adcl %edi, (%rsi) # sched: [8:1.00]
-; BROADWELL-NEXT: adcl (%rsi), %edi # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_adc_32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: adcl $665536, %eax # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [1:0.50]
-; SKYLAKE-NEXT: adcl $665536, %edi # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [1:0.50]
-; SKYLAKE-NEXT: adcl $665536, (%rsi) # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [8:1.00]
-; SKYLAKE-NEXT: adcl $7, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: adcl $7, (%rsi) # sched: [8:1.00]
-; SKYLAKE-NEXT: adcl %edx, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: adcl %edi, (%rsi) # sched: [8:1.00]
-; SKYLAKE-NEXT: adcl (%rsi), %edi # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_adc_32:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: adcl $665536, %eax # imm = 0xA27C0
-; SKX-NEXT: # sched: [1:0.50]
-; SKX-NEXT: adcl $665536, %edi # imm = 0xA27C0
-; SKX-NEXT: # sched: [1:0.50]
-; SKX-NEXT: adcl $665536, (%rsi) # imm = 0xA27C0
-; SKX-NEXT: # sched: [8:1.00]
-; SKX-NEXT: adcl $7, %edi # sched: [1:0.50]
-; SKX-NEXT: adcl $7, (%rsi) # sched: [8:1.00]
-; SKX-NEXT: adcl %edx, %edi # sched: [1:0.50]
-; SKX-NEXT: adcl %edi, (%rsi) # sched: [8:1.00]
-; SKX-NEXT: adcl (%rsi), %edi # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_adc_32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: adcl $665536, %eax # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [1:1.00]
-; BDVER2-NEXT: adcl $665536, %edi # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [1:1.00]
-; BDVER2-NEXT: adcl $665536, (%rsi) # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [6:1.00]
-; BDVER2-NEXT: adcl $7, %edi # sched: [1:1.00]
-; BDVER2-NEXT: adcl $7, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: adcl %edx, %edi # sched: [1:1.00]
-; BDVER2-NEXT: adcl %edi, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: adcl (%rsi), %edi # sched: [5:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_adc_32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: adcl $665536, %eax # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [1:1.00]
-; BTVER2-NEXT: adcl $665536, %edi # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [1:1.00]
-; BTVER2-NEXT: adcl $665536, (%rsi) # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [5:1.00]
-; BTVER2-NEXT: adcl $7, %edi # sched: [1:1.00]
-; BTVER2-NEXT: adcl $7, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: adcl %edx, %edi # sched: [1:1.00]
-; BTVER2-NEXT: adcl %edi, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: adcl (%rsi), %edi # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_adc_32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: adcl $665536, %eax # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: adcl $665536, %edi # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: adcl $665536, (%rsi) # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [5:0.50]
-; ZNVER1-NEXT: adcl $7, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: adcl $7, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: adcl %edx, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: adcl %edi, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: adcl (%rsi), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "adcl $3, %EAX \0A\09 adcl $3, $0 \0A\09 adcl $3, $2 \0A\09 adcl $4, $0 \0A\09 adcl $4, $2 \0A\09 adcl $1, $0 \0A\09 adcl $0, $2 \0A\09 adcl $2, $0", "r,r,*m,i,i"(i32 %a0, i32 %a2, i32* %a1, i32 665536, i8 7) nounwind
- ret void
-}
-define void @test_adc_64(i64 %a0, i64* %a1, i64 %a2) optsize {
-; GENERIC-LABEL: test_adc_64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: adcq $665536, %rax # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [2:0.67]
-; GENERIC-NEXT: adcq $665536, %rdi # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [2:0.67]
-; GENERIC-NEXT: adcq $665536, (%rsi) # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [9:1.00]
-; GENERIC-NEXT: adcq $7, %rdi # sched: [2:0.67]
-; GENERIC-NEXT: adcq $7, (%rsi) # sched: [9:1.00]
-; GENERIC-NEXT: adcq %rdx, %rdi # sched: [2:0.67]
-; GENERIC-NEXT: adcq %rdi, (%rsi) # sched: [9:1.00]
-; GENERIC-NEXT: adcq (%rsi), %rdi # sched: [7:0.67]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_adc_64:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: adcq $665536, %rax # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: adcq $665536, %rdi # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: adcq $665536, (%rsi) # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:1.00]
-; ATOM-NEXT: adcq $7, %rdi # sched: [1:0.50]
-; ATOM-NEXT: adcq $7, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: adcq %rdx, %rdi # sched: [1:0.50]
-; ATOM-NEXT: adcq %rdi, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: adcq (%rsi), %rdi # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_adc_64:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: adcq $665536, %rax # imm = 0xA27C0
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: adcq $665536, %rdi # imm = 0xA27C0
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: adcq $665536, (%rsi) # imm = 0xA27C0
-; SLM-NEXT: # sched: [5:2.00]
-; SLM-NEXT: adcq $7, %rdi # sched: [1:0.50]
-; SLM-NEXT: adcq $7, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: adcq %rdx, %rdi # sched: [1:0.50]
-; SLM-NEXT: adcq %rdi, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: adcq (%rsi), %rdi # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_adc_64:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: adcq $665536, %rax # imm = 0xA27C0
-; SANDY-NEXT: # sched: [2:0.67]
-; SANDY-NEXT: adcq $665536, %rdi # imm = 0xA27C0
-; SANDY-NEXT: # sched: [2:0.67]
-; SANDY-NEXT: adcq $665536, (%rsi) # imm = 0xA27C0
-; SANDY-NEXT: # sched: [9:1.00]
-; SANDY-NEXT: adcq $7, %rdi # sched: [2:0.67]
-; SANDY-NEXT: adcq $7, (%rsi) # sched: [9:1.00]
-; SANDY-NEXT: adcq %rdx, %rdi # sched: [2:0.67]
-; SANDY-NEXT: adcq %rdi, (%rsi) # sched: [9:1.00]
-; SANDY-NEXT: adcq (%rsi), %rdi # sched: [7:0.67]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_adc_64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: adcq $665536, %rax # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [2:0.50]
-; HASWELL-NEXT: adcq $665536, %rdi # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [2:0.50]
-; HASWELL-NEXT: adcq $665536, (%rsi) # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [9:1.00]
-; HASWELL-NEXT: adcq $7, %rdi # sched: [2:0.50]
-; HASWELL-NEXT: adcq $7, (%rsi) # sched: [9:1.00]
-; HASWELL-NEXT: adcq %rdx, %rdi # sched: [2:0.50]
-; HASWELL-NEXT: adcq %rdi, (%rsi) # sched: [9:1.00]
-; HASWELL-NEXT: adcq (%rsi), %rdi # sched: [7:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_adc_64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: adcq $665536, %rax # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [1:0.50]
-; BROADWELL-NEXT: adcq $665536, %rdi # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [1:0.50]
-; BROADWELL-NEXT: adcq $665536, (%rsi) # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [8:1.00]
-; BROADWELL-NEXT: adcq $7, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: adcq $7, (%rsi) # sched: [8:1.00]
-; BROADWELL-NEXT: adcq %rdx, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: adcq %rdi, (%rsi) # sched: [8:1.00]
-; BROADWELL-NEXT: adcq (%rsi), %rdi # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_adc_64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: adcq $665536, %rax # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [1:0.50]
-; SKYLAKE-NEXT: adcq $665536, %rdi # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [1:0.50]
-; SKYLAKE-NEXT: adcq $665536, (%rsi) # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [8:1.00]
-; SKYLAKE-NEXT: adcq $7, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: adcq $7, (%rsi) # sched: [8:1.00]
-; SKYLAKE-NEXT: adcq %rdx, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: adcq %rdi, (%rsi) # sched: [8:1.00]
-; SKYLAKE-NEXT: adcq (%rsi), %rdi # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_adc_64:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: adcq $665536, %rax # imm = 0xA27C0
-; SKX-NEXT: # sched: [1:0.50]
-; SKX-NEXT: adcq $665536, %rdi # imm = 0xA27C0
-; SKX-NEXT: # sched: [1:0.50]
-; SKX-NEXT: adcq $665536, (%rsi) # imm = 0xA27C0
-; SKX-NEXT: # sched: [8:1.00]
-; SKX-NEXT: adcq $7, %rdi # sched: [1:0.50]
-; SKX-NEXT: adcq $7, (%rsi) # sched: [8:1.00]
-; SKX-NEXT: adcq %rdx, %rdi # sched: [1:0.50]
-; SKX-NEXT: adcq %rdi, (%rsi) # sched: [8:1.00]
-; SKX-NEXT: adcq (%rsi), %rdi # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_adc_64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: adcq $665536, %rax # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [1:1.00]
-; BDVER2-NEXT: adcq $665536, %rdi # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [1:1.00]
-; BDVER2-NEXT: adcq $665536, (%rsi) # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [6:1.00]
-; BDVER2-NEXT: adcq $7, %rdi # sched: [1:1.00]
-; BDVER2-NEXT: adcq $7, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: adcq %rdx, %rdi # sched: [1:1.00]
-; BDVER2-NEXT: adcq %rdi, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: adcq (%rsi), %rdi # sched: [5:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_adc_64:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: adcq $665536, %rax # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [1:1.00]
-; BTVER2-NEXT: adcq $665536, %rdi # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [1:1.00]
-; BTVER2-NEXT: adcq $665536, (%rsi) # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [5:1.00]
-; BTVER2-NEXT: adcq $7, %rdi # sched: [1:1.00]
-; BTVER2-NEXT: adcq $7, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: adcq %rdx, %rdi # sched: [1:1.00]
-; BTVER2-NEXT: adcq %rdi, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: adcq (%rsi), %rdi # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_adc_64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: adcq $665536, %rax # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: adcq $665536, %rdi # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: adcq $665536, (%rsi) # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [5:0.50]
-; ZNVER1-NEXT: adcq $7, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: adcq $7, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: adcq %rdx, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: adcq %rdi, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: adcq (%rsi), %rdi # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "adcq $3, %RAX \0A\09 adcq $3, $0 \0A\09 adcq $3, $2 \0A\09 adcq $4, $0 \0A\09 adcq $4, $2 \0A\09 adcq $1, $0 \0A\09 adcq $0, $2 \0A\09 adcq $2, $0", "r,r,*m,i,i"(i64 %a0, i64 %a2, i64* %a1, i32 665536, i8 7) nounwind
- ret void
-}
-
-define void @test_add_8(i8 %a0, i8* %a1, i8 %a2) optsize {
-; GENERIC-LABEL: test_add_8:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: addb $7, %al # sched: [1:0.33]
-; GENERIC-NEXT: addb $7, %dil # sched: [1:0.33]
-; GENERIC-NEXT: addb $7, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: addb %dl, %dil # sched: [1:0.33]
-; GENERIC-NEXT: addb %dil, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: addb (%rsi), %dil # sched: [6:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_add_8:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: addb $7, %al # sched: [1:0.50]
-; ATOM-NEXT: addb $7, %dil # sched: [1:0.50]
-; ATOM-NEXT: addb $7, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: addb %dl, %dil # sched: [1:0.50]
-; ATOM-NEXT: addb %dil, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: addb (%rsi), %dil # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_add_8:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: addb $7, %al # sched: [1:0.50]
-; SLM-NEXT: addb $7, %dil # sched: [1:0.50]
-; SLM-NEXT: addb $7, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: addb %dl, %dil # sched: [1:0.50]
-; SLM-NEXT: addb %dil, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: addb (%rsi), %dil # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_add_8:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: addb $7, %al # sched: [1:0.33]
-; SANDY-NEXT: addb $7, %dil # sched: [1:0.33]
-; SANDY-NEXT: addb $7, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: addb %dl, %dil # sched: [1:0.33]
-; SANDY-NEXT: addb %dil, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: addb (%rsi), %dil # sched: [6:0.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_add_8:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: addb $7, %al # sched: [1:0.25]
-; HASWELL-NEXT: addb $7, %dil # sched: [1:0.25]
-; HASWELL-NEXT: addb $7, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: addb %dl, %dil # sched: [1:0.25]
-; HASWELL-NEXT: addb %dil, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: addb (%rsi), %dil # sched: [6:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_add_8:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: addb $7, %al # sched: [1:0.25]
-; BROADWELL-NEXT: addb $7, %dil # sched: [1:0.25]
-; BROADWELL-NEXT: addb $7, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: addb %dl, %dil # sched: [1:0.25]
-; BROADWELL-NEXT: addb %dil, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: addb (%rsi), %dil # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_add_8:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: addb $7, %al # sched: [1:0.25]
-; SKYLAKE-NEXT: addb $7, %dil # sched: [1:0.25]
-; SKYLAKE-NEXT: addb $7, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: addb %dl, %dil # sched: [1:0.25]
-; SKYLAKE-NEXT: addb %dil, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: addb (%rsi), %dil # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_add_8:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: addb $7, %al # sched: [1:0.25]
-; SKX-NEXT: addb $7, %dil # sched: [1:0.25]
-; SKX-NEXT: addb $7, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: addb %dl, %dil # sched: [1:0.25]
-; SKX-NEXT: addb %dil, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: addb (%rsi), %dil # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_add_8:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: addb $7, %al # sched: [1:0.50]
-; BDVER2-NEXT: addb $7, %dil # sched: [1:0.50]
-; BDVER2-NEXT: addb $7, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: addb %dl, %dil # sched: [1:0.50]
-; BDVER2-NEXT: addb %dil, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: addb (%rsi), %dil # sched: [5:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_add_8:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: addb $7, %al # sched: [1:0.50]
-; BTVER2-NEXT: addb $7, %dil # sched: [1:0.50]
-; BTVER2-NEXT: addb $7, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: addb %dl, %dil # sched: [1:0.50]
-; BTVER2-NEXT: addb %dil, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: addb (%rsi), %dil # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_add_8:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: addb $7, %al # sched: [1:0.25]
-; ZNVER1-NEXT: addb $7, %dil # sched: [1:0.25]
-; ZNVER1-NEXT: addb $7, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: addb %dl, %dil # sched: [1:0.25]
-; ZNVER1-NEXT: addb %dil, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: addb (%rsi), %dil # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "addb $3, %AL \0A\09 addb $3, $0 \0A\09 addb $3, $2 \0A\09 addb $1, $0 \0A\09 addb $0, $2 \0A\09 addb $2, $0", "r,r,*m,i"(i8 %a0, i8 %a2, i8* %a1, i8 7) nounwind
- ret void
-}
-define void @test_add_16(i16 %a0, i16* %a1, i16 %a2) optsize {
-; GENERIC-LABEL: test_add_16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: addw $511, %ax # imm = 0x1FF
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: addw $511, %di # imm = 0x1FF
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: addw $511, (%rsi) # imm = 0x1FF
-; GENERIC-NEXT: # sched: [7:1.00]
-; GENERIC-NEXT: addw $7, %di # sched: [1:0.33]
-; GENERIC-NEXT: addw $7, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: addw %dx, %di # sched: [1:0.33]
-; GENERIC-NEXT: addw %di, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: addw (%rsi), %di # sched: [6:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_add_16:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: addw $511, %ax # imm = 0x1FF
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: addw $511, %di # imm = 0x1FF
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: addw $511, (%rsi) # imm = 0x1FF
-; ATOM-NEXT: # sched: [1:1.00]
-; ATOM-NEXT: addw $7, %di # sched: [1:0.50]
-; ATOM-NEXT: addw $7, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: addw %dx, %di # sched: [1:0.50]
-; ATOM-NEXT: addw %di, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: addw (%rsi), %di # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_add_16:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: addw $511, %ax # imm = 0x1FF
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: addw $511, %di # imm = 0x1FF
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: addw $511, (%rsi) # imm = 0x1FF
-; SLM-NEXT: # sched: [5:2.00]
-; SLM-NEXT: addw $7, %di # sched: [1:0.50]
-; SLM-NEXT: addw $7, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: addw %dx, %di # sched: [1:0.50]
-; SLM-NEXT: addw %di, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: addw (%rsi), %di # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_add_16:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: addw $511, %ax # imm = 0x1FF
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: addw $511, %di # imm = 0x1FF
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: addw $511, (%rsi) # imm = 0x1FF
-; SANDY-NEXT: # sched: [7:1.00]
-; SANDY-NEXT: addw $7, %di # sched: [1:0.33]
-; SANDY-NEXT: addw $7, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: addw %dx, %di # sched: [1:0.33]
-; SANDY-NEXT: addw %di, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: addw (%rsi), %di # sched: [6:0.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_add_16:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: addw $511, %ax # imm = 0x1FF
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: addw $511, %di # imm = 0x1FF
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: addw $511, (%rsi) # imm = 0x1FF
-; HASWELL-NEXT: # sched: [7:1.00]
-; HASWELL-NEXT: addw $7, %di # sched: [1:0.25]
-; HASWELL-NEXT: addw $7, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: addw %dx, %di # sched: [1:0.25]
-; HASWELL-NEXT: addw %di, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: addw (%rsi), %di # sched: [6:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_add_16:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: addw $511, %ax # imm = 0x1FF
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: addw $511, %di # imm = 0x1FF
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: addw $511, (%rsi) # imm = 0x1FF
-; BROADWELL-NEXT: # sched: [7:1.00]
-; BROADWELL-NEXT: addw $7, %di # sched: [1:0.25]
-; BROADWELL-NEXT: addw $7, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: addw %dx, %di # sched: [1:0.25]
-; BROADWELL-NEXT: addw %di, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: addw (%rsi), %di # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_add_16:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: addw $511, %ax # imm = 0x1FF
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: addw $511, %di # imm = 0x1FF
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: addw $511, (%rsi) # imm = 0x1FF
-; SKYLAKE-NEXT: # sched: [7:1.00]
-; SKYLAKE-NEXT: addw $7, %di # sched: [1:0.25]
-; SKYLAKE-NEXT: addw $7, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: addw %dx, %di # sched: [1:0.25]
-; SKYLAKE-NEXT: addw %di, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: addw (%rsi), %di # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_add_16:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: addw $511, %ax # imm = 0x1FF
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: addw $511, %di # imm = 0x1FF
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: addw $511, (%rsi) # imm = 0x1FF
-; SKX-NEXT: # sched: [7:1.00]
-; SKX-NEXT: addw $7, %di # sched: [1:0.25]
-; SKX-NEXT: addw $7, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: addw %dx, %di # sched: [1:0.25]
-; SKX-NEXT: addw %di, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: addw (%rsi), %di # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_add_16:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: addw $511, %ax # imm = 0x1FF
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: addw $511, %di # imm = 0x1FF
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: addw $511, (%rsi) # imm = 0x1FF
-; BDVER2-NEXT: # sched: [6:1.00]
-; BDVER2-NEXT: addw $7, %di # sched: [1:0.50]
-; BDVER2-NEXT: addw $7, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: addw %dx, %di # sched: [1:0.50]
-; BDVER2-NEXT: addw %di, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: addw (%rsi), %di # sched: [5:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_add_16:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: addw $511, %ax # imm = 0x1FF
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: addw $511, %di # imm = 0x1FF
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: addw $511, (%rsi) # imm = 0x1FF
-; BTVER2-NEXT: # sched: [5:1.00]
-; BTVER2-NEXT: addw $7, %di # sched: [1:0.50]
-; BTVER2-NEXT: addw $7, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: addw %dx, %di # sched: [1:0.50]
-; BTVER2-NEXT: addw %di, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: addw (%rsi), %di # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_add_16:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: addw $511, %ax # imm = 0x1FF
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: addw $511, %di # imm = 0x1FF
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: addw $511, (%rsi) # imm = 0x1FF
-; ZNVER1-NEXT: # sched: [5:0.50]
-; ZNVER1-NEXT: addw $7, %di # sched: [1:0.25]
-; ZNVER1-NEXT: addw $7, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: addw %dx, %di # sched: [1:0.25]
-; ZNVER1-NEXT: addw %di, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: addw (%rsi), %di # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "addw $3, %AX \0A\09 addw $3, $0 \0A\09 addw $3, $2 \0A\09 addw $4, $0 \0A\09 addw $4, $2 \0A\09 addw $1, $0 \0A\09 addw $0, $2 \0A\09 addw $2, $0", "r,r,*m,i,i"(i16 %a0, i16 %a2, i16* %a1, i16 511, i8 7) nounwind
- ret void
-}
-define void @test_add_32(i32 %a0, i32* %a1, i32 %a2) optsize {
-; GENERIC-LABEL: test_add_32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: addl $665536, %eax # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: addl $665536, %edi # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: addl $665536, (%rsi) # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [7:1.00]
-; GENERIC-NEXT: addl $7, %edi # sched: [1:0.33]
-; GENERIC-NEXT: addl $7, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: addl %edx, %edi # sched: [1:0.33]
-; GENERIC-NEXT: addl %edi, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: addl (%rsi), %edi # sched: [6:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_add_32:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: addl $665536, %eax # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: addl $665536, %edi # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: addl $665536, (%rsi) # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:1.00]
-; ATOM-NEXT: addl $7, %edi # sched: [1:0.50]
-; ATOM-NEXT: addl $7, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: addl %edx, %edi # sched: [1:0.50]
-; ATOM-NEXT: addl %edi, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: addl (%rsi), %edi # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_add_32:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: addl $665536, %eax # imm = 0xA27C0
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: addl $665536, %edi # imm = 0xA27C0
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: addl $665536, (%rsi) # imm = 0xA27C0
-; SLM-NEXT: # sched: [5:2.00]
-; SLM-NEXT: addl $7, %edi # sched: [1:0.50]
-; SLM-NEXT: addl $7, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: addl %edx, %edi # sched: [1:0.50]
-; SLM-NEXT: addl %edi, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: addl (%rsi), %edi # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_add_32:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: addl $665536, %eax # imm = 0xA27C0
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: addl $665536, %edi # imm = 0xA27C0
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: addl $665536, (%rsi) # imm = 0xA27C0
-; SANDY-NEXT: # sched: [7:1.00]
-; SANDY-NEXT: addl $7, %edi # sched: [1:0.33]
-; SANDY-NEXT: addl $7, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: addl %edx, %edi # sched: [1:0.33]
-; SANDY-NEXT: addl %edi, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: addl (%rsi), %edi # sched: [6:0.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_add_32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: addl $665536, %eax # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: addl $665536, %edi # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: addl $665536, (%rsi) # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [7:1.00]
-; HASWELL-NEXT: addl $7, %edi # sched: [1:0.25]
-; HASWELL-NEXT: addl $7, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: addl %edx, %edi # sched: [1:0.25]
-; HASWELL-NEXT: addl %edi, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: addl (%rsi), %edi # sched: [6:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_add_32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: addl $665536, %eax # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: addl $665536, %edi # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: addl $665536, (%rsi) # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [7:1.00]
-; BROADWELL-NEXT: addl $7, %edi # sched: [1:0.25]
-; BROADWELL-NEXT: addl $7, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: addl %edx, %edi # sched: [1:0.25]
-; BROADWELL-NEXT: addl %edi, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: addl (%rsi), %edi # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_add_32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: addl $665536, %eax # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: addl $665536, %edi # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: addl $665536, (%rsi) # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [7:1.00]
-; SKYLAKE-NEXT: addl $7, %edi # sched: [1:0.25]
-; SKYLAKE-NEXT: addl $7, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: addl %edx, %edi # sched: [1:0.25]
-; SKYLAKE-NEXT: addl %edi, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: addl (%rsi), %edi # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_add_32:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: addl $665536, %eax # imm = 0xA27C0
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: addl $665536, %edi # imm = 0xA27C0
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: addl $665536, (%rsi) # imm = 0xA27C0
-; SKX-NEXT: # sched: [7:1.00]
-; SKX-NEXT: addl $7, %edi # sched: [1:0.25]
-; SKX-NEXT: addl $7, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: addl %edx, %edi # sched: [1:0.25]
-; SKX-NEXT: addl %edi, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: addl (%rsi), %edi # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_add_32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: addl $665536, %eax # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: addl $665536, %edi # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: addl $665536, (%rsi) # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [6:1.00]
-; BDVER2-NEXT: addl $7, %edi # sched: [1:0.50]
-; BDVER2-NEXT: addl $7, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: addl %edx, %edi # sched: [1:0.50]
-; BDVER2-NEXT: addl %edi, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: addl (%rsi), %edi # sched: [5:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_add_32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: addl $665536, %eax # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: addl $665536, %edi # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: addl $665536, (%rsi) # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [5:1.00]
-; BTVER2-NEXT: addl $7, %edi # sched: [1:0.50]
-; BTVER2-NEXT: addl $7, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: addl %edx, %edi # sched: [1:0.50]
-; BTVER2-NEXT: addl %edi, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: addl (%rsi), %edi # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_add_32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: addl $665536, %eax # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: addl $665536, %edi # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: addl $665536, (%rsi) # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [5:0.50]
-; ZNVER1-NEXT: addl $7, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: addl $7, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: addl %edx, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: addl %edi, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: addl (%rsi), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "addl $3, %EAX \0A\09 addl $3, $0 \0A\09 addl $3, $2 \0A\09 addl $4, $0 \0A\09 addl $4, $2 \0A\09 addl $1, $0 \0A\09 addl $0, $2 \0A\09 addl $2, $0", "r,r,*m,i,i"(i32 %a0, i32 %a2, i32* %a1, i32 665536, i8 7) nounwind
- ret void
-}
-define void @test_add_64(i64 %a0, i64* %a1, i64 %a2) optsize {
-; GENERIC-LABEL: test_add_64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: addq $665536, %rax # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: addq $665536, %rdi # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: addq $665536, (%rsi) # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [7:1.00]
-; GENERIC-NEXT: addq $7, %rdi # sched: [1:0.33]
-; GENERIC-NEXT: addq $7, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: addq %rdx, %rdi # sched: [1:0.33]
-; GENERIC-NEXT: addq %rdi, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: addq (%rsi), %rdi # sched: [6:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_add_64:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: addq $665536, %rax # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: addq $665536, %rdi # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: addq $665536, (%rsi) # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:1.00]
-; ATOM-NEXT: addq $7, %rdi # sched: [1:0.50]
-; ATOM-NEXT: addq $7, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: addq %rdx, %rdi # sched: [1:0.50]
-; ATOM-NEXT: addq %rdi, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: addq (%rsi), %rdi # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_add_64:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: addq $665536, %rax # imm = 0xA27C0
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: addq $665536, %rdi # imm = 0xA27C0
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: addq $665536, (%rsi) # imm = 0xA27C0
-; SLM-NEXT: # sched: [5:2.00]
-; SLM-NEXT: addq $7, %rdi # sched: [1:0.50]
-; SLM-NEXT: addq $7, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: addq %rdx, %rdi # sched: [1:0.50]
-; SLM-NEXT: addq %rdi, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: addq (%rsi), %rdi # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_add_64:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: addq $665536, %rax # imm = 0xA27C0
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: addq $665536, %rdi # imm = 0xA27C0
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: addq $665536, (%rsi) # imm = 0xA27C0
-; SANDY-NEXT: # sched: [7:1.00]
-; SANDY-NEXT: addq $7, %rdi # sched: [1:0.33]
-; SANDY-NEXT: addq $7, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: addq %rdx, %rdi # sched: [1:0.33]
-; SANDY-NEXT: addq %rdi, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: addq (%rsi), %rdi # sched: [6:0.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_add_64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: addq $665536, %rax # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: addq $665536, %rdi # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: addq $665536, (%rsi) # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [7:1.00]
-; HASWELL-NEXT: addq $7, %rdi # sched: [1:0.25]
-; HASWELL-NEXT: addq $7, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: addq %rdx, %rdi # sched: [1:0.25]
-; HASWELL-NEXT: addq %rdi, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: addq (%rsi), %rdi # sched: [6:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_add_64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: addq $665536, %rax # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: addq $665536, %rdi # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: addq $665536, (%rsi) # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [7:1.00]
-; BROADWELL-NEXT: addq $7, %rdi # sched: [1:0.25]
-; BROADWELL-NEXT: addq $7, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: addq %rdx, %rdi # sched: [1:0.25]
-; BROADWELL-NEXT: addq %rdi, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: addq (%rsi), %rdi # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_add_64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: addq $665536, %rax # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: addq $665536, %rdi # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: addq $665536, (%rsi) # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [7:1.00]
-; SKYLAKE-NEXT: addq $7, %rdi # sched: [1:0.25]
-; SKYLAKE-NEXT: addq $7, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: addq %rdx, %rdi # sched: [1:0.25]
-; SKYLAKE-NEXT: addq %rdi, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: addq (%rsi), %rdi # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_add_64:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: addq $665536, %rax # imm = 0xA27C0
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: addq $665536, %rdi # imm = 0xA27C0
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: addq $665536, (%rsi) # imm = 0xA27C0
-; SKX-NEXT: # sched: [7:1.00]
-; SKX-NEXT: addq $7, %rdi # sched: [1:0.25]
-; SKX-NEXT: addq $7, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: addq %rdx, %rdi # sched: [1:0.25]
-; SKX-NEXT: addq %rdi, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: addq (%rsi), %rdi # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_add_64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: addq $665536, %rax # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: addq $665536, %rdi # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: addq $665536, (%rsi) # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [6:1.00]
-; BDVER2-NEXT: addq $7, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: addq $7, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: addq %rdx, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: addq %rdi, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: addq (%rsi), %rdi # sched: [5:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_add_64:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: addq $665536, %rax # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: addq $665536, %rdi # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: addq $665536, (%rsi) # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [5:1.00]
-; BTVER2-NEXT: addq $7, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: addq $7, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: addq %rdx, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: addq %rdi, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: addq (%rsi), %rdi # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_add_64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: addq $665536, %rax # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: addq $665536, %rdi # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: addq $665536, (%rsi) # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [5:0.50]
-; ZNVER1-NEXT: addq $7, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: addq $7, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: addq %rdx, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: addq %rdi, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: addq (%rsi), %rdi # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "addq $3, %RAX \0A\09 addq $3, $0 \0A\09 addq $3, $2 \0A\09 addq $4, $0 \0A\09 addq $4, $2 \0A\09 addq $1, $0 \0A\09 addq $0, $2 \0A\09 addq $2, $0", "r,r,*m,i,i"(i64 %a0, i64 %a2, i64* %a1, i32 665536, i8 7) nounwind
- ret void
-}
-
-define void @test_and_8(i8 %a0, i8* %a1, i8 %a2) optsize {
-; GENERIC-LABEL: test_and_8:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: andb $7, %al # sched: [1:0.33]
-; GENERIC-NEXT: andb $7, %dil # sched: [1:0.33]
-; GENERIC-NEXT: andb $7, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: andb %dl, %dil # sched: [1:0.33]
-; GENERIC-NEXT: andb %dil, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: andb (%rsi), %dil # sched: [6:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_and_8:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: andb $7, %al # sched: [1:0.50]
-; ATOM-NEXT: andb $7, %dil # sched: [1:0.50]
-; ATOM-NEXT: andb $7, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: andb %dl, %dil # sched: [1:0.50]
-; ATOM-NEXT: andb %dil, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: andb (%rsi), %dil # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_and_8:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: andb $7, %al # sched: [1:0.50]
-; SLM-NEXT: andb $7, %dil # sched: [1:0.50]
-; SLM-NEXT: andb $7, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: andb %dl, %dil # sched: [1:0.50]
-; SLM-NEXT: andb %dil, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: andb (%rsi), %dil # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_and_8:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: andb $7, %al # sched: [1:0.33]
-; SANDY-NEXT: andb $7, %dil # sched: [1:0.33]
-; SANDY-NEXT: andb $7, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: andb %dl, %dil # sched: [1:0.33]
-; SANDY-NEXT: andb %dil, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: andb (%rsi), %dil # sched: [6:0.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_and_8:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: andb $7, %al # sched: [1:0.25]
-; HASWELL-NEXT: andb $7, %dil # sched: [1:0.25]
-; HASWELL-NEXT: andb $7, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: andb %dl, %dil # sched: [1:0.25]
-; HASWELL-NEXT: andb %dil, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: andb (%rsi), %dil # sched: [6:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_and_8:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: andb $7, %al # sched: [1:0.25]
-; BROADWELL-NEXT: andb $7, %dil # sched: [1:0.25]
-; BROADWELL-NEXT: andb $7, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: andb %dl, %dil # sched: [1:0.25]
-; BROADWELL-NEXT: andb %dil, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: andb (%rsi), %dil # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_and_8:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: andb $7, %al # sched: [1:0.25]
-; SKYLAKE-NEXT: andb $7, %dil # sched: [1:0.25]
-; SKYLAKE-NEXT: andb $7, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: andb %dl, %dil # sched: [1:0.25]
-; SKYLAKE-NEXT: andb %dil, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: andb (%rsi), %dil # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_and_8:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: andb $7, %al # sched: [1:0.25]
-; SKX-NEXT: andb $7, %dil # sched: [1:0.25]
-; SKX-NEXT: andb $7, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: andb %dl, %dil # sched: [1:0.25]
-; SKX-NEXT: andb %dil, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: andb (%rsi), %dil # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_and_8:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: andb $7, %al # sched: [1:0.50]
-; BDVER2-NEXT: andb $7, %dil # sched: [1:0.50]
-; BDVER2-NEXT: andb $7, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: andb %dl, %dil # sched: [1:0.50]
-; BDVER2-NEXT: andb %dil, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: andb (%rsi), %dil # sched: [5:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_and_8:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: andb $7, %al # sched: [1:0.50]
-; BTVER2-NEXT: andb $7, %dil # sched: [1:0.50]
-; BTVER2-NEXT: andb $7, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: andb %dl, %dil # sched: [1:0.50]
-; BTVER2-NEXT: andb %dil, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: andb (%rsi), %dil # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_and_8:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: andb $7, %al # sched: [1:0.25]
-; ZNVER1-NEXT: andb $7, %dil # sched: [1:0.25]
-; ZNVER1-NEXT: andb $7, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: andb %dl, %dil # sched: [1:0.25]
-; ZNVER1-NEXT: andb %dil, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: andb (%rsi), %dil # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "andb $3, %AL \0A\09 andb $3, $0 \0A\09 andb $3, $2 \0A\09 andb $1, $0 \0A\09 andb $0, $2 \0A\09 andb $2, $0", "r,r,*m,i"(i8 %a0, i8 %a2, i8* %a1, i8 7) nounwind
- ret void
-}
-define void @test_and_16(i16 %a0, i16* %a1, i16 %a2) optsize {
-; GENERIC-LABEL: test_and_16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: andw $511, %ax # imm = 0x1FF
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: andw $511, %di # imm = 0x1FF
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: andw $511, (%rsi) # imm = 0x1FF
-; GENERIC-NEXT: # sched: [7:1.00]
-; GENERIC-NEXT: andw $7, %di # sched: [1:0.33]
-; GENERIC-NEXT: andw $7, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: andw %dx, %di # sched: [1:0.33]
-; GENERIC-NEXT: andw %di, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: andw (%rsi), %di # sched: [6:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_and_16:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: andw $511, %ax # imm = 0x1FF
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: andw $511, %di # imm = 0x1FF
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: andw $511, (%rsi) # imm = 0x1FF
-; ATOM-NEXT: # sched: [1:1.00]
-; ATOM-NEXT: andw $7, %di # sched: [1:0.50]
-; ATOM-NEXT: andw $7, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: andw %dx, %di # sched: [1:0.50]
-; ATOM-NEXT: andw %di, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: andw (%rsi), %di # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_and_16:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: andw $511, %ax # imm = 0x1FF
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: andw $511, %di # imm = 0x1FF
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: andw $511, (%rsi) # imm = 0x1FF
-; SLM-NEXT: # sched: [5:2.00]
-; SLM-NEXT: andw $7, %di # sched: [1:0.50]
-; SLM-NEXT: andw $7, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: andw %dx, %di # sched: [1:0.50]
-; SLM-NEXT: andw %di, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: andw (%rsi), %di # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_and_16:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: andw $511, %ax # imm = 0x1FF
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: andw $511, %di # imm = 0x1FF
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: andw $511, (%rsi) # imm = 0x1FF
-; SANDY-NEXT: # sched: [7:1.00]
-; SANDY-NEXT: andw $7, %di # sched: [1:0.33]
-; SANDY-NEXT: andw $7, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: andw %dx, %di # sched: [1:0.33]
-; SANDY-NEXT: andw %di, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: andw (%rsi), %di # sched: [6:0.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_and_16:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: andw $511, %ax # imm = 0x1FF
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: andw $511, %di # imm = 0x1FF
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: andw $511, (%rsi) # imm = 0x1FF
-; HASWELL-NEXT: # sched: [7:1.00]
-; HASWELL-NEXT: andw $7, %di # sched: [1:0.25]
-; HASWELL-NEXT: andw $7, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: andw %dx, %di # sched: [1:0.25]
-; HASWELL-NEXT: andw %di, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: andw (%rsi), %di # sched: [6:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_and_16:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: andw $511, %ax # imm = 0x1FF
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: andw $511, %di # imm = 0x1FF
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: andw $511, (%rsi) # imm = 0x1FF
-; BROADWELL-NEXT: # sched: [7:1.00]
-; BROADWELL-NEXT: andw $7, %di # sched: [1:0.25]
-; BROADWELL-NEXT: andw $7, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: andw %dx, %di # sched: [1:0.25]
-; BROADWELL-NEXT: andw %di, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: andw (%rsi), %di # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_and_16:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: andw $511, %ax # imm = 0x1FF
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: andw $511, %di # imm = 0x1FF
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: andw $511, (%rsi) # imm = 0x1FF
-; SKYLAKE-NEXT: # sched: [7:1.00]
-; SKYLAKE-NEXT: andw $7, %di # sched: [1:0.25]
-; SKYLAKE-NEXT: andw $7, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: andw %dx, %di # sched: [1:0.25]
-; SKYLAKE-NEXT: andw %di, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: andw (%rsi), %di # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_and_16:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: andw $511, %ax # imm = 0x1FF
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: andw $511, %di # imm = 0x1FF
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: andw $511, (%rsi) # imm = 0x1FF
-; SKX-NEXT: # sched: [7:1.00]
-; SKX-NEXT: andw $7, %di # sched: [1:0.25]
-; SKX-NEXT: andw $7, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: andw %dx, %di # sched: [1:0.25]
-; SKX-NEXT: andw %di, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: andw (%rsi), %di # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_and_16:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: andw $511, %ax # imm = 0x1FF
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: andw $511, %di # imm = 0x1FF
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: andw $511, (%rsi) # imm = 0x1FF
-; BDVER2-NEXT: # sched: [6:1.00]
-; BDVER2-NEXT: andw $7, %di # sched: [1:0.50]
-; BDVER2-NEXT: andw $7, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: andw %dx, %di # sched: [1:0.50]
-; BDVER2-NEXT: andw %di, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: andw (%rsi), %di # sched: [5:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_and_16:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: andw $511, %ax # imm = 0x1FF
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: andw $511, %di # imm = 0x1FF
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: andw $511, (%rsi) # imm = 0x1FF
-; BTVER2-NEXT: # sched: [5:1.00]
-; BTVER2-NEXT: andw $7, %di # sched: [1:0.50]
-; BTVER2-NEXT: andw $7, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: andw %dx, %di # sched: [1:0.50]
-; BTVER2-NEXT: andw %di, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: andw (%rsi), %di # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_and_16:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: andw $511, %ax # imm = 0x1FF
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: andw $511, %di # imm = 0x1FF
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: andw $511, (%rsi) # imm = 0x1FF
-; ZNVER1-NEXT: # sched: [5:0.50]
-; ZNVER1-NEXT: andw $7, %di # sched: [1:0.25]
-; ZNVER1-NEXT: andw $7, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: andw %dx, %di # sched: [1:0.25]
-; ZNVER1-NEXT: andw %di, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: andw (%rsi), %di # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "andw $3, %AX \0A\09 andw $3, $0 \0A\09 andw $3, $2 \0A\09 andw $4, $0 \0A\09 andw $4, $2 \0A\09 andw $1, $0 \0A\09 andw $0, $2 \0A\09 andw $2, $0", "r,r,*m,i,i"(i16 %a0, i16 %a2, i16* %a1, i16 511, i8 7) nounwind
- ret void
-}
-define void @test_and_32(i32 %a0, i32* %a1, i32 %a2) optsize {
-; GENERIC-LABEL: test_and_32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: andl $665536, %eax # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: andl $665536, %edi # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: andl $665536, (%rsi) # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [7:1.00]
-; GENERIC-NEXT: andl $7, %edi # sched: [1:0.33]
-; GENERIC-NEXT: andl $7, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: andl %edx, %edi # sched: [1:0.33]
-; GENERIC-NEXT: andl %edi, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: andl (%rsi), %edi # sched: [6:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_and_32:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: andl $665536, %eax # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: andl $665536, %edi # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: andl $665536, (%rsi) # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:1.00]
-; ATOM-NEXT: andl $7, %edi # sched: [1:0.50]
-; ATOM-NEXT: andl $7, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: andl %edx, %edi # sched: [1:0.50]
-; ATOM-NEXT: andl %edi, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: andl (%rsi), %edi # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_and_32:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: andl $665536, %eax # imm = 0xA27C0
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: andl $665536, %edi # imm = 0xA27C0
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: andl $665536, (%rsi) # imm = 0xA27C0
-; SLM-NEXT: # sched: [5:2.00]
-; SLM-NEXT: andl $7, %edi # sched: [1:0.50]
-; SLM-NEXT: andl $7, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: andl %edx, %edi # sched: [1:0.50]
-; SLM-NEXT: andl %edi, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: andl (%rsi), %edi # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_and_32:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: andl $665536, %eax # imm = 0xA27C0
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: andl $665536, %edi # imm = 0xA27C0
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: andl $665536, (%rsi) # imm = 0xA27C0
-; SANDY-NEXT: # sched: [7:1.00]
-; SANDY-NEXT: andl $7, %edi # sched: [1:0.33]
-; SANDY-NEXT: andl $7, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: andl %edx, %edi # sched: [1:0.33]
-; SANDY-NEXT: andl %edi, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: andl (%rsi), %edi # sched: [6:0.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_and_32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: andl $665536, %eax # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: andl $665536, %edi # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: andl $665536, (%rsi) # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [7:1.00]
-; HASWELL-NEXT: andl $7, %edi # sched: [1:0.25]
-; HASWELL-NEXT: andl $7, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: andl %edx, %edi # sched: [1:0.25]
-; HASWELL-NEXT: andl %edi, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: andl (%rsi), %edi # sched: [6:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_and_32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: andl $665536, %eax # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: andl $665536, %edi # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: andl $665536, (%rsi) # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [7:1.00]
-; BROADWELL-NEXT: andl $7, %edi # sched: [1:0.25]
-; BROADWELL-NEXT: andl $7, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: andl %edx, %edi # sched: [1:0.25]
-; BROADWELL-NEXT: andl %edi, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: andl (%rsi), %edi # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_and_32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: andl $665536, %eax # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: andl $665536, %edi # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: andl $665536, (%rsi) # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [7:1.00]
-; SKYLAKE-NEXT: andl $7, %edi # sched: [1:0.25]
-; SKYLAKE-NEXT: andl $7, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: andl %edx, %edi # sched: [1:0.25]
-; SKYLAKE-NEXT: andl %edi, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: andl (%rsi), %edi # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_and_32:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: andl $665536, %eax # imm = 0xA27C0
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: andl $665536, %edi # imm = 0xA27C0
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: andl $665536, (%rsi) # imm = 0xA27C0
-; SKX-NEXT: # sched: [7:1.00]
-; SKX-NEXT: andl $7, %edi # sched: [1:0.25]
-; SKX-NEXT: andl $7, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: andl %edx, %edi # sched: [1:0.25]
-; SKX-NEXT: andl %edi, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: andl (%rsi), %edi # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_and_32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: andl $665536, %eax # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: andl $665536, %edi # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: andl $665536, (%rsi) # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [6:1.00]
-; BDVER2-NEXT: andl $7, %edi # sched: [1:0.50]
-; BDVER2-NEXT: andl $7, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: andl %edx, %edi # sched: [1:0.50]
-; BDVER2-NEXT: andl %edi, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: andl (%rsi), %edi # sched: [5:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_and_32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: andl $665536, %eax # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: andl $665536, %edi # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: andl $665536, (%rsi) # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [5:1.00]
-; BTVER2-NEXT: andl $7, %edi # sched: [1:0.50]
-; BTVER2-NEXT: andl $7, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: andl %edx, %edi # sched: [1:0.50]
-; BTVER2-NEXT: andl %edi, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: andl (%rsi), %edi # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_and_32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: andl $665536, %eax # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: andl $665536, %edi # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: andl $665536, (%rsi) # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [5:0.50]
-; ZNVER1-NEXT: andl $7, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: andl $7, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: andl %edx, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: andl %edi, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: andl (%rsi), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "andl $3, %EAX \0A\09 andl $3, $0 \0A\09 andl $3, $2 \0A\09 andl $4, $0 \0A\09 andl $4, $2 \0A\09 andl $1, $0 \0A\09 andl $0, $2 \0A\09 andl $2, $0", "r,r,*m,i,i"(i32 %a0, i32 %a2, i32* %a1, i32 665536, i8 7) nounwind
- ret void
-}
-define void @test_and_64(i64 %a0, i64* %a1, i64 %a2) optsize {
-; GENERIC-LABEL: test_and_64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: andq $665536, %rax # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: andq $665536, %rdi # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: andq $665536, (%rsi) # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [7:1.00]
-; GENERIC-NEXT: andq $7, %rdi # sched: [1:0.33]
-; GENERIC-NEXT: andq $7, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: andq %rdx, %rdi # sched: [1:0.33]
-; GENERIC-NEXT: andq %rdi, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: andq (%rsi), %rdi # sched: [6:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_and_64:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: andq $665536, %rax # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: andq $665536, %rdi # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: andq $665536, (%rsi) # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:1.00]
-; ATOM-NEXT: andq $7, %rdi # sched: [1:0.50]
-; ATOM-NEXT: andq $7, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: andq %rdx, %rdi # sched: [1:0.50]
-; ATOM-NEXT: andq %rdi, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: andq (%rsi), %rdi # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_and_64:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: andq $665536, %rax # imm = 0xA27C0
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: andq $665536, %rdi # imm = 0xA27C0
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: andq $665536, (%rsi) # imm = 0xA27C0
-; SLM-NEXT: # sched: [5:2.00]
-; SLM-NEXT: andq $7, %rdi # sched: [1:0.50]
-; SLM-NEXT: andq $7, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: andq %rdx, %rdi # sched: [1:0.50]
-; SLM-NEXT: andq %rdi, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: andq (%rsi), %rdi # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_and_64:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: andq $665536, %rax # imm = 0xA27C0
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: andq $665536, %rdi # imm = 0xA27C0
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: andq $665536, (%rsi) # imm = 0xA27C0
-; SANDY-NEXT: # sched: [7:1.00]
-; SANDY-NEXT: andq $7, %rdi # sched: [1:0.33]
-; SANDY-NEXT: andq $7, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: andq %rdx, %rdi # sched: [1:0.33]
-; SANDY-NEXT: andq %rdi, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: andq (%rsi), %rdi # sched: [6:0.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_and_64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: andq $665536, %rax # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: andq $665536, %rdi # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: andq $665536, (%rsi) # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [7:1.00]
-; HASWELL-NEXT: andq $7, %rdi # sched: [1:0.25]
-; HASWELL-NEXT: andq $7, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: andq %rdx, %rdi # sched: [1:0.25]
-; HASWELL-NEXT: andq %rdi, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: andq (%rsi), %rdi # sched: [6:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_and_64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: andq $665536, %rax # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: andq $665536, %rdi # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: andq $665536, (%rsi) # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [7:1.00]
-; BROADWELL-NEXT: andq $7, %rdi # sched: [1:0.25]
-; BROADWELL-NEXT: andq $7, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: andq %rdx, %rdi # sched: [1:0.25]
-; BROADWELL-NEXT: andq %rdi, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: andq (%rsi), %rdi # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_and_64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: andq $665536, %rax # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: andq $665536, %rdi # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: andq $665536, (%rsi) # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [7:1.00]
-; SKYLAKE-NEXT: andq $7, %rdi # sched: [1:0.25]
-; SKYLAKE-NEXT: andq $7, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: andq %rdx, %rdi # sched: [1:0.25]
-; SKYLAKE-NEXT: andq %rdi, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: andq (%rsi), %rdi # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_and_64:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: andq $665536, %rax # imm = 0xA27C0
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: andq $665536, %rdi # imm = 0xA27C0
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: andq $665536, (%rsi) # imm = 0xA27C0
-; SKX-NEXT: # sched: [7:1.00]
-; SKX-NEXT: andq $7, %rdi # sched: [1:0.25]
-; SKX-NEXT: andq $7, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: andq %rdx, %rdi # sched: [1:0.25]
-; SKX-NEXT: andq %rdi, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: andq (%rsi), %rdi # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_and_64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: andq $665536, %rax # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: andq $665536, %rdi # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: andq $665536, (%rsi) # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [6:1.00]
-; BDVER2-NEXT: andq $7, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: andq $7, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: andq %rdx, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: andq %rdi, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: andq (%rsi), %rdi # sched: [5:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_and_64:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: andq $665536, %rax # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: andq $665536, %rdi # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: andq $665536, (%rsi) # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [5:1.00]
-; BTVER2-NEXT: andq $7, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: andq $7, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: andq %rdx, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: andq %rdi, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: andq (%rsi), %rdi # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_and_64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: andq $665536, %rax # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: andq $665536, %rdi # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: andq $665536, (%rsi) # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [5:0.50]
-; ZNVER1-NEXT: andq $7, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: andq $7, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: andq %rdx, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: andq %rdi, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: andq (%rsi), %rdi # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "andq $3, %RAX \0A\09 andq $3, $0 \0A\09 andq $3, $2 \0A\09 andq $4, $0 \0A\09 andq $4, $2 \0A\09 andq $1, $0 \0A\09 andq $0, $2 \0A\09 andq $2, $0", "r,r,*m,i,i"(i64 %a0, i64 %a2, i64* %a1, i32 665536, i8 7) nounwind
- ret void
-}
-
-define i16 @test_bsf16(i16 %a0, i16* %a1) optsize {
-; GENERIC-LABEL: test_bsf16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: bsfw %di, %ax # sched: [3:1.00]
-; GENERIC-NEXT: bsfw (%rsi), %cx # sched: [8:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: orl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_bsf16:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: bsfw %di, %ax # sched: [16:8.00]
-; ATOM-NEXT: bsfw (%rsi), %cx # sched: [16:8.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: orl %ecx, %eax # sched: [1:0.50]
-; ATOM-NEXT: # kill: def $ax killed $ax killed $eax
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_bsf16:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: bsfw %di, %ax # sched: [10:10.00]
-; SLM-NEXT: bsfw (%rsi), %cx # sched: [13:10.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: orl %ecx, %eax # sched: [1:0.50]
-; SLM-NEXT: # kill: def $ax killed $ax killed $eax
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_bsf16:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: bsfw %di, %ax # sched: [3:1.00]
-; SANDY-NEXT: bsfw (%rsi), %cx # sched: [8:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: orl %ecx, %eax # sched: [1:0.33]
-; SANDY-NEXT: # kill: def $ax killed $ax killed $eax
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_bsf16:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: bsfw %di, %ax # sched: [3:1.00]
-; HASWELL-NEXT: bsfw (%rsi), %cx # sched: [8:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT: # kill: def $ax killed $ax killed $eax
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_bsf16:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: bsfw %di, %ax # sched: [3:1.00]
-; BROADWELL-NEXT: bsfw (%rsi), %cx # sched: [8:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: # kill: def $ax killed $ax killed $eax
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_bsf16:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: bsfw %di, %ax # sched: [3:1.00]
-; SKYLAKE-NEXT: bsfw (%rsi), %cx # sched: [8:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: # kill: def $ax killed $ax killed $eax
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_bsf16:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: bsfw %di, %ax # sched: [3:1.00]
-; SKX-NEXT: bsfw (%rsi), %cx # sched: [8:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; SKX-NEXT: # kill: def $ax killed $ax killed $eax
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_bsf16:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: bsfw %di, %ax # sched: [3:2.00]
-; BDVER2-NEXT: bsfw (%rsi), %cx # sched: [7:2.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.50]
-; BDVER2-NEXT: # kill: def $ax killed $ax killed $eax
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_bsf16:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: bsfw %di, %ax # sched: [4:4.00]
-; BTVER2-NEXT: bsfw (%rsi), %cx # sched: [7:4.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: orl %ecx, %eax # sched: [1:0.50]
-; BTVER2-NEXT: # kill: def $ax killed $ax killed $eax
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_bsf16:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: bsfw %di, %ax # sched: [3:0.25]
-; ZNVER1-NEXT: bsfw (%rsi), %cx # sched: [7:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: # kill: def $ax killed $ax killed $eax
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call { i16, i16 } asm sideeffect "bsf $2, $0 \0A\09 bsf $3, $1", "=r,=r,r,*m,~{dirflag},~{fpsr},~{flags}"(i16 %a0, i16* %a1)
- %2 = extractvalue { i16, i16 } %1, 0
- %3 = extractvalue { i16, i16 } %1, 1
- %4 = or i16 %2, %3
- ret i16 %4
-}
-define i32 @test_bsf32(i32 %a0, i32* %a1) optsize {
-; GENERIC-LABEL: test_bsf32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: bsfl %edi, %eax # sched: [3:1.00]
-; GENERIC-NEXT: bsfl (%rsi), %ecx # sched: [8:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: orl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_bsf32:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: bsfl %edi, %eax # sched: [16:8.00]
-; ATOM-NEXT: bsfl (%rsi), %ecx # sched: [16:8.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: orl %ecx, %eax # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_bsf32:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: bsfl %edi, %eax # sched: [10:10.00]
-; SLM-NEXT: bsfl (%rsi), %ecx # sched: [13:10.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: orl %ecx, %eax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_bsf32:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: bsfl %edi, %eax # sched: [3:1.00]
-; SANDY-NEXT: bsfl (%rsi), %ecx # sched: [8:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: orl %ecx, %eax # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_bsf32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: bsfl %edi, %eax # sched: [3:1.00]
-; HASWELL-NEXT: bsfl (%rsi), %ecx # sched: [8:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_bsf32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: bsfl %edi, %eax # sched: [3:1.00]
-; BROADWELL-NEXT: bsfl (%rsi), %ecx # sched: [8:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_bsf32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: bsfl %edi, %eax # sched: [3:1.00]
-; SKYLAKE-NEXT: bsfl (%rsi), %ecx # sched: [8:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_bsf32:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: bsfl %edi, %eax # sched: [3:1.00]
-; SKX-NEXT: bsfl (%rsi), %ecx # sched: [8:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_bsf32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: bsfl %edi, %eax # sched: [3:2.00]
-; BDVER2-NEXT: bsfl (%rsi), %ecx # sched: [7:2.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_bsf32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: bsfl %edi, %eax # sched: [4:4.00]
-; BTVER2-NEXT: bsfl (%rsi), %ecx # sched: [7:4.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: orl %ecx, %eax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_bsf32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: bsfl %edi, %eax # sched: [3:0.25]
-; ZNVER1-NEXT: bsfl (%rsi), %ecx # sched: [7:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call { i32, i32 } asm sideeffect "bsf $2, $0 \0A\09 bsf $3, $1", "=r,=r,r,*m,~{dirflag},~{fpsr},~{flags}"(i32 %a0, i32* %a1)
- %2 = extractvalue { i32, i32 } %1, 0
- %3 = extractvalue { i32, i32 } %1, 1
- %4 = or i32 %2, %3
- ret i32 %4
-}
-define i64 @test_bsf64(i64 %a0, i64* %a1) optsize {
-; GENERIC-LABEL: test_bsf64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: bsfq %rdi, %rax # sched: [3:1.00]
-; GENERIC-NEXT: bsfq (%rsi), %rcx # sched: [8:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: orq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_bsf64:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: bsfq %rdi, %rax # sched: [16:8.00]
-; ATOM-NEXT: bsfq (%rsi), %rcx # sched: [16:8.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: orq %rcx, %rax # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_bsf64:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: bsfq %rdi, %rax # sched: [10:10.00]
-; SLM-NEXT: bsfq (%rsi), %rcx # sched: [13:10.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: orq %rcx, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_bsf64:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: bsfq %rdi, %rax # sched: [3:1.00]
-; SANDY-NEXT: bsfq (%rsi), %rcx # sched: [8:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: orq %rcx, %rax # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_bsf64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: bsfq %rdi, %rax # sched: [3:1.00]
-; HASWELL-NEXT: bsfq (%rsi), %rcx # sched: [8:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: orq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_bsf64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: bsfq %rdi, %rax # sched: [3:1.00]
-; BROADWELL-NEXT: bsfq (%rsi), %rcx # sched: [8:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: orq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_bsf64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: bsfq %rdi, %rax # sched: [3:1.00]
-; SKYLAKE-NEXT: bsfq (%rsi), %rcx # sched: [8:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: orq %rcx, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_bsf64:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: bsfq %rdi, %rax # sched: [3:1.00]
-; SKX-NEXT: bsfq (%rsi), %rcx # sched: [8:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: orq %rcx, %rax # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_bsf64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: bsfq %rdi, %rax # sched: [3:2.00]
-; BDVER2-NEXT: bsfq (%rsi), %rcx # sched: [7:2.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_bsf64:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: bsfq %rdi, %rax # sched: [4:4.00]
-; BTVER2-NEXT: bsfq (%rsi), %rcx # sched: [7:4.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: orq %rcx, %rax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_bsf64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: bsfq %rdi, %rax # sched: [3:0.25]
-; ZNVER1-NEXT: bsfq (%rsi), %rcx # sched: [7:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: orq %rcx, %rax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call { i64, i64 } asm sideeffect "bsf $2, $0 \0A\09 bsf $3, $1", "=r,=r,r,*m,~{dirflag},~{fpsr},~{flags}"(i64 %a0, i64* %a1)
- %2 = extractvalue { i64, i64 } %1, 0
- %3 = extractvalue { i64, i64 } %1, 1
- %4 = or i64 %2, %3
- ret i64 %4
-}
-
-define i16 @test_bsr16(i16 %a0, i16* %a1) optsize {
-; GENERIC-LABEL: test_bsr16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: bsrw %di, %ax # sched: [3:1.00]
-; GENERIC-NEXT: bsrw (%rsi), %cx # sched: [8:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: orl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_bsr16:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: bsrw %di, %ax # sched: [16:8.00]
-; ATOM-NEXT: bsrw (%rsi), %cx # sched: [16:8.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: orl %ecx, %eax # sched: [1:0.50]
-; ATOM-NEXT: # kill: def $ax killed $ax killed $eax
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_bsr16:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: bsrw %di, %ax # sched: [10:10.00]
-; SLM-NEXT: bsrw (%rsi), %cx # sched: [13:10.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: orl %ecx, %eax # sched: [1:0.50]
-; SLM-NEXT: # kill: def $ax killed $ax killed $eax
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_bsr16:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: bsrw %di, %ax # sched: [3:1.00]
-; SANDY-NEXT: bsrw (%rsi), %cx # sched: [8:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: orl %ecx, %eax # sched: [1:0.33]
-; SANDY-NEXT: # kill: def $ax killed $ax killed $eax
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_bsr16:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: bsrw %di, %ax # sched: [3:1.00]
-; HASWELL-NEXT: bsrw (%rsi), %cx # sched: [8:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT: # kill: def $ax killed $ax killed $eax
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_bsr16:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: bsrw %di, %ax # sched: [3:1.00]
-; BROADWELL-NEXT: bsrw (%rsi), %cx # sched: [8:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: # kill: def $ax killed $ax killed $eax
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_bsr16:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: bsrw %di, %ax # sched: [3:1.00]
-; SKYLAKE-NEXT: bsrw (%rsi), %cx # sched: [8:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: # kill: def $ax killed $ax killed $eax
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_bsr16:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: bsrw %di, %ax # sched: [3:1.00]
-; SKX-NEXT: bsrw (%rsi), %cx # sched: [8:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; SKX-NEXT: # kill: def $ax killed $ax killed $eax
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_bsr16:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: bsrw %di, %ax # sched: [4:2.00]
-; BDVER2-NEXT: bsrw (%rsi), %cx # sched: [8:2.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.50]
-; BDVER2-NEXT: # kill: def $ax killed $ax killed $eax
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_bsr16:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: bsrw %di, %ax # sched: [5:4.00]
-; BTVER2-NEXT: bsrw (%rsi), %cx # sched: [8:4.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: orl %ecx, %eax # sched: [1:0.50]
-; BTVER2-NEXT: # kill: def $ax killed $ax killed $eax
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_bsr16:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: bsrw %di, %ax # sched: [3:0.25]
-; ZNVER1-NEXT: bsrw (%rsi), %cx # sched: [7:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: # kill: def $ax killed $ax killed $eax
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call { i16, i16 } asm sideeffect "bsr $2, $0 \0A\09 bsr $3, $1", "=r,=r,r,*m,~{dirflag},~{fpsr},~{flags}"(i16 %a0, i16* %a1)
- %2 = extractvalue { i16, i16 } %1, 0
- %3 = extractvalue { i16, i16 } %1, 1
- %4 = or i16 %2, %3
- ret i16 %4
-}
-define i32 @test_bsr32(i32 %a0, i32* %a1) optsize {
-; GENERIC-LABEL: test_bsr32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: bsrl %edi, %eax # sched: [3:1.00]
-; GENERIC-NEXT: bsrl (%rsi), %ecx # sched: [8:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: orl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_bsr32:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: bsrl %edi, %eax # sched: [16:8.00]
-; ATOM-NEXT: bsrl (%rsi), %ecx # sched: [16:8.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: orl %ecx, %eax # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_bsr32:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: bsrl %edi, %eax # sched: [10:10.00]
-; SLM-NEXT: bsrl (%rsi), %ecx # sched: [13:10.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: orl %ecx, %eax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_bsr32:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: bsrl %edi, %eax # sched: [3:1.00]
-; SANDY-NEXT: bsrl (%rsi), %ecx # sched: [8:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: orl %ecx, %eax # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_bsr32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: bsrl %edi, %eax # sched: [3:1.00]
-; HASWELL-NEXT: bsrl (%rsi), %ecx # sched: [8:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_bsr32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: bsrl %edi, %eax # sched: [3:1.00]
-; BROADWELL-NEXT: bsrl (%rsi), %ecx # sched: [8:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_bsr32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: bsrl %edi, %eax # sched: [3:1.00]
-; SKYLAKE-NEXT: bsrl (%rsi), %ecx # sched: [8:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_bsr32:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: bsrl %edi, %eax # sched: [3:1.00]
-; SKX-NEXT: bsrl (%rsi), %ecx # sched: [8:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_bsr32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: bsrl %edi, %eax # sched: [4:2.00]
-; BDVER2-NEXT: bsrl (%rsi), %ecx # sched: [8:2.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_bsr32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: bsrl %edi, %eax # sched: [5:4.00]
-; BTVER2-NEXT: bsrl (%rsi), %ecx # sched: [8:4.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: orl %ecx, %eax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_bsr32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: bsrl %edi, %eax # sched: [3:0.25]
-; ZNVER1-NEXT: bsrl (%rsi), %ecx # sched: [7:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: orl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call { i32, i32 } asm sideeffect "bsr $2, $0 \0A\09 bsr $3, $1", "=r,=r,r,*m,~{dirflag},~{fpsr},~{flags}"(i32 %a0, i32* %a1)
- %2 = extractvalue { i32, i32 } %1, 0
- %3 = extractvalue { i32, i32 } %1, 1
- %4 = or i32 %2, %3
- ret i32 %4
-}
-define i64 @test_bsr64(i64 %a0, i64* %a1) optsize {
-; GENERIC-LABEL: test_bsr64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: bsrq %rdi, %rax # sched: [3:1.00]
-; GENERIC-NEXT: bsrq (%rsi), %rcx # sched: [8:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: orq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_bsr64:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: bsrq %rdi, %rax # sched: [16:8.00]
-; ATOM-NEXT: bsrq (%rsi), %rcx # sched: [16:8.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: orq %rcx, %rax # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_bsr64:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: bsrq %rdi, %rax # sched: [10:10.00]
-; SLM-NEXT: bsrq (%rsi), %rcx # sched: [13:10.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: orq %rcx, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_bsr64:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: bsrq %rdi, %rax # sched: [3:1.00]
-; SANDY-NEXT: bsrq (%rsi), %rcx # sched: [8:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: orq %rcx, %rax # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_bsr64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: bsrq %rdi, %rax # sched: [3:1.00]
-; HASWELL-NEXT: bsrq (%rsi), %rcx # sched: [8:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: orq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_bsr64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: bsrq %rdi, %rax # sched: [3:1.00]
-; BROADWELL-NEXT: bsrq (%rsi), %rcx # sched: [8:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: orq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_bsr64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: bsrq %rdi, %rax # sched: [3:1.00]
-; SKYLAKE-NEXT: bsrq (%rsi), %rcx # sched: [8:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: orq %rcx, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_bsr64:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: bsrq %rdi, %rax # sched: [3:1.00]
-; SKX-NEXT: bsrq (%rsi), %rcx # sched: [8:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: orq %rcx, %rax # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_bsr64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: bsrq %rdi, %rax # sched: [4:2.00]
-; BDVER2-NEXT: bsrq (%rsi), %rcx # sched: [8:2.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_bsr64:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: bsrq %rdi, %rax # sched: [5:4.00]
-; BTVER2-NEXT: bsrq (%rsi), %rcx # sched: [8:4.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: orq %rcx, %rax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_bsr64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: bsrq %rdi, %rax # sched: [3:0.25]
-; ZNVER1-NEXT: bsrq (%rsi), %rcx # sched: [7:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: orq %rcx, %rax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call { i64, i64 } asm sideeffect "bsr $2, $0 \0A\09 bsr $3, $1", "=r,=r,r,*m,~{dirflag},~{fpsr},~{flags}"(i64 %a0, i64* %a1)
- %2 = extractvalue { i64, i64 } %1, 0
- %3 = extractvalue { i64, i64 } %1, 1
- %4 = or i64 %2, %3
- ret i64 %4
-}
-
-define i32 @test_bswap32(i32 %a0) optsize {
-; GENERIC-LABEL: test_bswap32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33]
-; GENERIC-NEXT: bswapl %eax # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_bswap32:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl %edi, %eax # sched: [1:0.50]
-; ATOM-NEXT: bswapl %eax # sched: [1:1.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_bswap32:
-; SLM: # %bb.0:
-; SLM-NEXT: movl %edi, %eax # sched: [1:0.50]
-; SLM-NEXT: bswapl %eax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_bswap32:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33]
-; SANDY-NEXT: bswapl %eax # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_bswap32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25]
-; HASWELL-NEXT: bswapl %eax # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_bswap32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl %edi, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: bswapl %eax # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_bswap32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl %edi, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: bswapl %eax # sched: [1:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_bswap32:
-; SKX: # %bb.0:
-; SKX-NEXT: movl %edi, %eax # sched: [1:0.25]
-; SKX-NEXT: bswapl %eax # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_bswap32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl %edi, %eax # sched: [1:0.50]
-; BDVER2-NEXT: bswapl %eax # sched: [1:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_bswap32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl %edi, %eax # sched: [1:0.50]
-; BTVER2-NEXT: bswapl %eax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_bswap32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl %edi, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: bswapl %eax # sched: [1:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = tail call i32 asm "bswap $0", "=r,0"(i32 %a0) nounwind
- ret i32 %1
-}
-define i64 @test_bswap64(i64 %a0) optsize {
-; GENERIC-LABEL: test_bswap64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33]
-; GENERIC-NEXT: bswapq %rax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_bswap64:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; ATOM-NEXT: bswapq %rax # sched: [1:1.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_bswap64:
-; SLM: # %bb.0:
-; SLM-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; SLM-NEXT: bswapq %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_bswap64:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33]
-; SANDY-NEXT: bswapq %rax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_bswap64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25]
-; HASWELL-NEXT: bswapq %rax # sched: [2:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_bswap64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movq %rdi, %rax # sched: [1:0.25]
-; BROADWELL-NEXT: bswapq %rax # sched: [2:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_bswap64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movq %rdi, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT: bswapq %rax # sched: [2:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_bswap64:
-; SKX: # %bb.0:
-; SKX-NEXT: movq %rdi, %rax # sched: [1:0.25]
-; SKX-NEXT: bswapq %rax # sched: [2:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_bswap64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; BDVER2-NEXT: bswapq %rax # sched: [1:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_bswap64:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; BTVER2-NEXT: bswapq %rax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_bswap64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movq %rdi, %rax # sched: [1:0.25]
-; ZNVER1-NEXT: bswapq %rax # sched: [1:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = tail call i64 asm "bswap $0", "=r,0"(i64 %a0) nounwind
- ret i64 %1
-}
-
-define void @test_bt_btc_btr_bts_16(i16 %a0, i16 %a1, i16 *%a2) optsize {
-; GENERIC-LABEL: test_bt_btc_btr_bts_16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: btw %si, %di # sched: [1:0.50]
-; GENERIC-NEXT: btcw %si, %di # sched: [1:0.50]
-; GENERIC-NEXT: btrw %si, %di # sched: [1:0.50]
-; GENERIC-NEXT: btsw %si, %di # sched: [1:0.50]
-; GENERIC-NEXT: btw %si, (%rdx) # sched: [9:1.00]
-; GENERIC-NEXT: btcw %si, (%rdx) # sched: [9:1.00]
-; GENERIC-NEXT: btrw %si, (%rdx) # sched: [9:1.00]
-; GENERIC-NEXT: btsw %si, (%rdx) # sched: [9:1.00]
-; GENERIC-NEXT: btw $7, %di # sched: [1:0.50]
-; GENERIC-NEXT: btcw $7, %di # sched: [1:0.50]
-; GENERIC-NEXT: btrw $7, %di # sched: [1:0.50]
-; GENERIC-NEXT: btsw $7, %di # sched: [1:0.50]
-; GENERIC-NEXT: btw $7, (%rdx) # sched: [6:0.50]
-; GENERIC-NEXT: btcw $7, (%rdx) # sched: [7:1.00]
-; GENERIC-NEXT: btrw $7, (%rdx) # sched: [7:1.00]
-; GENERIC-NEXT: btsw $7, (%rdx) # sched: [7:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_bt_btc_btr_bts_16:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: btw %si, %di # sched: [1:1.00]
-; ATOM-NEXT: btcw %si, %di # sched: [1:1.00]
-; ATOM-NEXT: btrw %si, %di # sched: [1:1.00]
-; ATOM-NEXT: btsw %si, %di # sched: [1:1.00]
-; ATOM-NEXT: btw %si, (%rdx) # sched: [9:4.50]
-; ATOM-NEXT: btcw %si, (%rdx) # sched: [11:5.50]
-; ATOM-NEXT: btrw %si, (%rdx) # sched: [11:5.50]
-; ATOM-NEXT: btsw %si, (%rdx) # sched: [11:5.50]
-; ATOM-NEXT: btw $7, %di # sched: [1:1.00]
-; ATOM-NEXT: btcw $7, %di # sched: [1:1.00]
-; ATOM-NEXT: btrw $7, %di # sched: [1:1.00]
-; ATOM-NEXT: btsw $7, %di # sched: [1:1.00]
-; ATOM-NEXT: btw $7, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: btcw $7, (%rdx) # sched: [2:1.00]
-; ATOM-NEXT: btrw $7, (%rdx) # sched: [2:1.00]
-; ATOM-NEXT: btsw $7, (%rdx) # sched: [2:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_bt_btc_btr_bts_16:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: btw %si, %di # sched: [1:0.50]
-; SLM-NEXT: btcw %si, %di # sched: [1:0.50]
-; SLM-NEXT: btrw %si, %di # sched: [1:0.50]
-; SLM-NEXT: btsw %si, %di # sched: [1:0.50]
-; SLM-NEXT: btw %si, (%rdx) # sched: [4:1.00]
-; SLM-NEXT: btcw %si, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: btrw %si, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: btsw %si, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: btw $7, %di # sched: [1:0.50]
-; SLM-NEXT: btcw $7, %di # sched: [1:0.50]
-; SLM-NEXT: btrw $7, %di # sched: [1:0.50]
-; SLM-NEXT: btsw $7, %di # sched: [1:0.50]
-; SLM-NEXT: btw $7, (%rdx) # sched: [4:1.00]
-; SLM-NEXT: btcw $7, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: btrw $7, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: btsw $7, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_bt_btc_btr_bts_16:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: btw %si, %di # sched: [1:0.50]
-; SANDY-NEXT: btcw %si, %di # sched: [1:0.50]
-; SANDY-NEXT: btrw %si, %di # sched: [1:0.50]
-; SANDY-NEXT: btsw %si, %di # sched: [1:0.50]
-; SANDY-NEXT: btw %si, (%rdx) # sched: [9:1.00]
-; SANDY-NEXT: btcw %si, (%rdx) # sched: [9:1.00]
-; SANDY-NEXT: btrw %si, (%rdx) # sched: [9:1.00]
-; SANDY-NEXT: btsw %si, (%rdx) # sched: [9:1.00]
-; SANDY-NEXT: btw $7, %di # sched: [1:0.50]
-; SANDY-NEXT: btcw $7, %di # sched: [1:0.50]
-; SANDY-NEXT: btrw $7, %di # sched: [1:0.50]
-; SANDY-NEXT: btsw $7, %di # sched: [1:0.50]
-; SANDY-NEXT: btw $7, (%rdx) # sched: [6:0.50]
-; SANDY-NEXT: btcw $7, (%rdx) # sched: [7:1.00]
-; SANDY-NEXT: btrw $7, (%rdx) # sched: [7:1.00]
-; SANDY-NEXT: btsw $7, (%rdx) # sched: [7:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_bt_btc_btr_bts_16:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: btw %si, %di # sched: [1:0.50]
-; HASWELL-NEXT: btcw %si, %di # sched: [1:0.50]
-; HASWELL-NEXT: btrw %si, %di # sched: [1:0.50]
-; HASWELL-NEXT: btsw %si, %di # sched: [1:0.50]
-; HASWELL-NEXT: btw %si, (%rdx) # sched: [1:2.50]
-; HASWELL-NEXT: btcw %si, (%rdx) # sched: [1:2.75]
-; HASWELL-NEXT: btrw %si, (%rdx) # sched: [1:2.75]
-; HASWELL-NEXT: btsw %si, (%rdx) # sched: [1:2.75]
-; HASWELL-NEXT: btw $7, %di # sched: [1:0.50]
-; HASWELL-NEXT: btcw $7, %di # sched: [1:0.50]
-; HASWELL-NEXT: btrw $7, %di # sched: [1:0.50]
-; HASWELL-NEXT: btsw $7, %di # sched: [1:0.50]
-; HASWELL-NEXT: btw $7, (%rdx) # sched: [6:0.50]
-; HASWELL-NEXT: btcw $7, (%rdx) # sched: [7:1.00]
-; HASWELL-NEXT: btrw $7, (%rdx) # sched: [7:1.00]
-; HASWELL-NEXT: btsw $7, (%rdx) # sched: [7:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_bt_btc_btr_bts_16:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: btw %si, %di # sched: [1:0.50]
-; BROADWELL-NEXT: btcw %si, %di # sched: [1:0.50]
-; BROADWELL-NEXT: btrw %si, %di # sched: [1:0.50]
-; BROADWELL-NEXT: btsw %si, %di # sched: [1:0.50]
-; BROADWELL-NEXT: btw %si, (%rdx) # sched: [6:0.50]
-; BROADWELL-NEXT: btcw %si, (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: btrw %si, (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: btsw %si, (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: btw $7, %di # sched: [1:0.50]
-; BROADWELL-NEXT: btcw $7, %di # sched: [1:0.50]
-; BROADWELL-NEXT: btrw $7, %di # sched: [1:0.50]
-; BROADWELL-NEXT: btsw $7, %di # sched: [1:0.50]
-; BROADWELL-NEXT: btw $7, (%rdx) # sched: [6:0.50]
-; BROADWELL-NEXT: btcw $7, (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: btrw $7, (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: btsw $7, (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_bt_btc_btr_bts_16:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: btw %si, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: btcw %si, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: btrw %si, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: btsw %si, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: btw %si, (%rdx) # sched: [6:0.50]
-; SKYLAKE-NEXT: btcw %si, (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: btrw %si, (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: btsw %si, (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: btw $7, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: btcw $7, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: btrw $7, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: btsw $7, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: btw $7, (%rdx) # sched: [6:0.50]
-; SKYLAKE-NEXT: btcw $7, (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: btrw $7, (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: btsw $7, (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_bt_btc_btr_bts_16:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: btw %si, %di # sched: [1:0.50]
-; SKX-NEXT: btcw %si, %di # sched: [1:0.50]
-; SKX-NEXT: btrw %si, %di # sched: [1:0.50]
-; SKX-NEXT: btsw %si, %di # sched: [1:0.50]
-; SKX-NEXT: btw %si, (%rdx) # sched: [6:0.50]
-; SKX-NEXT: btcw %si, (%rdx) # sched: [6:1.00]
-; SKX-NEXT: btrw %si, (%rdx) # sched: [6:1.00]
-; SKX-NEXT: btsw %si, (%rdx) # sched: [6:1.00]
-; SKX-NEXT: btw $7, %di # sched: [1:0.50]
-; SKX-NEXT: btcw $7, %di # sched: [1:0.50]
-; SKX-NEXT: btrw $7, %di # sched: [1:0.50]
-; SKX-NEXT: btsw $7, %di # sched: [1:0.50]
-; SKX-NEXT: btw $7, (%rdx) # sched: [6:0.50]
-; SKX-NEXT: btcw $7, (%rdx) # sched: [6:1.00]
-; SKX-NEXT: btrw $7, (%rdx) # sched: [6:1.00]
-; SKX-NEXT: btsw $7, (%rdx) # sched: [6:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_bt_btc_btr_bts_16:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: btw %si, %di # sched: [1:0.50]
-; BDVER2-NEXT: btcw %si, %di # sched: [2:0.50]
-; BDVER2-NEXT: btrw %si, %di # sched: [2:0.50]
-; BDVER2-NEXT: btsw %si, %di # sched: [2:0.50]
-; BDVER2-NEXT: btw %si, (%rdx) # sched: [5:0.50]
-; BDVER2-NEXT: btcw %si, (%rdx) # sched: [7:1.00]
-; BDVER2-NEXT: btrw %si, (%rdx) # sched: [7:1.00]
-; BDVER2-NEXT: btsw %si, (%rdx) # sched: [7:1.00]
-; BDVER2-NEXT: btw $7, %di # sched: [1:0.50]
-; BDVER2-NEXT: btcw $7, %di # sched: [2:0.50]
-; BDVER2-NEXT: btrw $7, %di # sched: [2:0.50]
-; BDVER2-NEXT: btsw $7, %di # sched: [2:0.50]
-; BDVER2-NEXT: btw $7, (%rdx) # sched: [5:0.50]
-; BDVER2-NEXT: btcw $7, (%rdx) # sched: [7:1.00]
-; BDVER2-NEXT: btrw $7, (%rdx) # sched: [7:1.00]
-; BDVER2-NEXT: btsw $7, (%rdx) # sched: [7:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_bt_btc_btr_bts_16:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: btw %si, %di # sched: [1:0.50]
-; BTVER2-NEXT: btcw %si, %di # sched: [1:0.50]
-; BTVER2-NEXT: btrw %si, %di # sched: [1:0.50]
-; BTVER2-NEXT: btsw %si, %di # sched: [1:0.50]
-; BTVER2-NEXT: btw %si, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: btcw %si, (%rdx) # sched: [5:1.00]
-; BTVER2-NEXT: btrw %si, (%rdx) # sched: [5:1.00]
-; BTVER2-NEXT: btsw %si, (%rdx) # sched: [5:1.00]
-; BTVER2-NEXT: btw $7, %di # sched: [1:0.50]
-; BTVER2-NEXT: btcw $7, %di # sched: [1:0.50]
-; BTVER2-NEXT: btrw $7, %di # sched: [1:0.50]
-; BTVER2-NEXT: btsw $7, %di # sched: [1:0.50]
-; BTVER2-NEXT: btw $7, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: btcw $7, (%rdx) # sched: [5:1.00]
-; BTVER2-NEXT: btrw $7, (%rdx) # sched: [5:1.00]
-; BTVER2-NEXT: btsw $7, (%rdx) # sched: [5:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_bt_btc_btr_bts_16:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: btw %si, %di # sched: [1:0.25]
-; ZNVER1-NEXT: btcw %si, %di # sched: [2:0.25]
-; ZNVER1-NEXT: btrw %si, %di # sched: [2:0.25]
-; ZNVER1-NEXT: btsw %si, %di # sched: [2:0.25]
-; ZNVER1-NEXT: btw %si, (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: btcw %si, (%rdx) # sched: [6:0.50]
-; ZNVER1-NEXT: btrw %si, (%rdx) # sched: [6:0.50]
-; ZNVER1-NEXT: btsw %si, (%rdx) # sched: [6:0.50]
-; ZNVER1-NEXT: btw $7, %di # sched: [1:0.25]
-; ZNVER1-NEXT: btcw $7, %di # sched: [2:0.25]
-; ZNVER1-NEXT: btrw $7, %di # sched: [2:0.25]
-; ZNVER1-NEXT: btsw $7, %di # sched: [2:0.25]
-; ZNVER1-NEXT: btw $7, (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: btcw $7, (%rdx) # sched: [6:0.50]
-; ZNVER1-NEXT: btrw $7, (%rdx) # sched: [6:0.50]
-; ZNVER1-NEXT: btsw $7, (%rdx) # sched: [6:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "btw $1, $0 \0A\09 btcw $1, $0 \0A\09 btrw $1, $0 \0A\09 btsw $1, $0 \0A\09 btw $1, $2 \0A\09 btcw $1, $2 \0A\09 btrw $1, $2 \0A\09 btsw $1, $2 \0A\09 btw $3, $0 \0A\09 btcw $3, $0 \0A\09 btrw $3, $0 \0A\09 btsw $3, $0 \0A\09 btw $3, $2 \0A\09 btcw $3, $2 \0A\09 btrw $3, $2 \0A\09 btsw $3, $2", "r,r,*m,i"(i16 %a0, i16 %a1, i16 *%a2, i8 7)
- ret void
-}
-define void @test_bt_btc_btr_bts_32(i32 %a0, i32 %a1, i32 *%a2) optsize {
-; GENERIC-LABEL: test_bt_btc_btr_bts_32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: btl %esi, %edi # sched: [1:0.50]
-; GENERIC-NEXT: btcl %esi, %edi # sched: [1:0.50]
-; GENERIC-NEXT: btrl %esi, %edi # sched: [1:0.50]
-; GENERIC-NEXT: btsl %esi, %edi # sched: [1:0.50]
-; GENERIC-NEXT: btl %esi, (%rdx) # sched: [9:1.00]
-; GENERIC-NEXT: btcl %esi, (%rdx) # sched: [9:1.00]
-; GENERIC-NEXT: btrl %esi, (%rdx) # sched: [9:1.00]
-; GENERIC-NEXT: btsl %esi, (%rdx) # sched: [9:1.00]
-; GENERIC-NEXT: btl $7, %edi # sched: [1:0.50]
-; GENERIC-NEXT: btcl $7, %edi # sched: [1:0.50]
-; GENERIC-NEXT: btrl $7, %edi # sched: [1:0.50]
-; GENERIC-NEXT: btsl $7, %edi # sched: [1:0.50]
-; GENERIC-NEXT: btl $7, (%rdx) # sched: [6:0.50]
-; GENERIC-NEXT: btcl $7, (%rdx) # sched: [7:1.00]
-; GENERIC-NEXT: btrl $7, (%rdx) # sched: [7:1.00]
-; GENERIC-NEXT: btsl $7, (%rdx) # sched: [7:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_bt_btc_btr_bts_32:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: btl %esi, %edi # sched: [1:1.00]
-; ATOM-NEXT: btcl %esi, %edi # sched: [1:1.00]
-; ATOM-NEXT: btrl %esi, %edi # sched: [1:1.00]
-; ATOM-NEXT: btsl %esi, %edi # sched: [1:1.00]
-; ATOM-NEXT: btl %esi, (%rdx) # sched: [9:4.50]
-; ATOM-NEXT: btcl %esi, (%rdx) # sched: [11:5.50]
-; ATOM-NEXT: btrl %esi, (%rdx) # sched: [11:5.50]
-; ATOM-NEXT: btsl %esi, (%rdx) # sched: [11:5.50]
-; ATOM-NEXT: btl $7, %edi # sched: [1:1.00]
-; ATOM-NEXT: btcl $7, %edi # sched: [1:1.00]
-; ATOM-NEXT: btrl $7, %edi # sched: [1:1.00]
-; ATOM-NEXT: btsl $7, %edi # sched: [1:1.00]
-; ATOM-NEXT: btl $7, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: btcl $7, (%rdx) # sched: [2:1.00]
-; ATOM-NEXT: btrl $7, (%rdx) # sched: [2:1.00]
-; ATOM-NEXT: btsl $7, (%rdx) # sched: [2:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_bt_btc_btr_bts_32:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: btl %esi, %edi # sched: [1:0.50]
-; SLM-NEXT: btcl %esi, %edi # sched: [1:0.50]
-; SLM-NEXT: btrl %esi, %edi # sched: [1:0.50]
-; SLM-NEXT: btsl %esi, %edi # sched: [1:0.50]
-; SLM-NEXT: btl %esi, (%rdx) # sched: [4:1.00]
-; SLM-NEXT: btcl %esi, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: btrl %esi, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: btsl %esi, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: btl $7, %edi # sched: [1:0.50]
-; SLM-NEXT: btcl $7, %edi # sched: [1:0.50]
-; SLM-NEXT: btrl $7, %edi # sched: [1:0.50]
-; SLM-NEXT: btsl $7, %edi # sched: [1:0.50]
-; SLM-NEXT: btl $7, (%rdx) # sched: [4:1.00]
-; SLM-NEXT: btcl $7, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: btrl $7, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: btsl $7, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_bt_btc_btr_bts_32:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: btl %esi, %edi # sched: [1:0.50]
-; SANDY-NEXT: btcl %esi, %edi # sched: [1:0.50]
-; SANDY-NEXT: btrl %esi, %edi # sched: [1:0.50]
-; SANDY-NEXT: btsl %esi, %edi # sched: [1:0.50]
-; SANDY-NEXT: btl %esi, (%rdx) # sched: [9:1.00]
-; SANDY-NEXT: btcl %esi, (%rdx) # sched: [9:1.00]
-; SANDY-NEXT: btrl %esi, (%rdx) # sched: [9:1.00]
-; SANDY-NEXT: btsl %esi, (%rdx) # sched: [9:1.00]
-; SANDY-NEXT: btl $7, %edi # sched: [1:0.50]
-; SANDY-NEXT: btcl $7, %edi # sched: [1:0.50]
-; SANDY-NEXT: btrl $7, %edi # sched: [1:0.50]
-; SANDY-NEXT: btsl $7, %edi # sched: [1:0.50]
-; SANDY-NEXT: btl $7, (%rdx) # sched: [6:0.50]
-; SANDY-NEXT: btcl $7, (%rdx) # sched: [7:1.00]
-; SANDY-NEXT: btrl $7, (%rdx) # sched: [7:1.00]
-; SANDY-NEXT: btsl $7, (%rdx) # sched: [7:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_bt_btc_btr_bts_32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: btl %esi, %edi # sched: [1:0.50]
-; HASWELL-NEXT: btcl %esi, %edi # sched: [1:0.50]
-; HASWELL-NEXT: btrl %esi, %edi # sched: [1:0.50]
-; HASWELL-NEXT: btsl %esi, %edi # sched: [1:0.50]
-; HASWELL-NEXT: btl %esi, (%rdx) # sched: [1:2.50]
-; HASWELL-NEXT: btcl %esi, (%rdx) # sched: [1:2.75]
-; HASWELL-NEXT: btrl %esi, (%rdx) # sched: [1:2.75]
-; HASWELL-NEXT: btsl %esi, (%rdx) # sched: [1:2.75]
-; HASWELL-NEXT: btl $7, %edi # sched: [1:0.50]
-; HASWELL-NEXT: btcl $7, %edi # sched: [1:0.50]
-; HASWELL-NEXT: btrl $7, %edi # sched: [1:0.50]
-; HASWELL-NEXT: btsl $7, %edi # sched: [1:0.50]
-; HASWELL-NEXT: btl $7, (%rdx) # sched: [6:0.50]
-; HASWELL-NEXT: btcl $7, (%rdx) # sched: [7:1.00]
-; HASWELL-NEXT: btrl $7, (%rdx) # sched: [7:1.00]
-; HASWELL-NEXT: btsl $7, (%rdx) # sched: [7:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_bt_btc_btr_bts_32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: btl %esi, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: btcl %esi, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: btrl %esi, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: btsl %esi, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: btl %esi, (%rdx) # sched: [6:0.50]
-; BROADWELL-NEXT: btcl %esi, (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: btrl %esi, (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: btsl %esi, (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: btl $7, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: btcl $7, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: btrl $7, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: btsl $7, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: btl $7, (%rdx) # sched: [6:0.50]
-; BROADWELL-NEXT: btcl $7, (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: btrl $7, (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: btsl $7, (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_bt_btc_btr_bts_32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: btl %esi, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: btcl %esi, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: btrl %esi, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: btsl %esi, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: btl %esi, (%rdx) # sched: [6:0.50]
-; SKYLAKE-NEXT: btcl %esi, (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: btrl %esi, (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: btsl %esi, (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: btl $7, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: btcl $7, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: btrl $7, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: btsl $7, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: btl $7, (%rdx) # sched: [6:0.50]
-; SKYLAKE-NEXT: btcl $7, (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: btrl $7, (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: btsl $7, (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_bt_btc_btr_bts_32:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: btl %esi, %edi # sched: [1:0.50]
-; SKX-NEXT: btcl %esi, %edi # sched: [1:0.50]
-; SKX-NEXT: btrl %esi, %edi # sched: [1:0.50]
-; SKX-NEXT: btsl %esi, %edi # sched: [1:0.50]
-; SKX-NEXT: btl %esi, (%rdx) # sched: [6:0.50]
-; SKX-NEXT: btcl %esi, (%rdx) # sched: [6:1.00]
-; SKX-NEXT: btrl %esi, (%rdx) # sched: [6:1.00]
-; SKX-NEXT: btsl %esi, (%rdx) # sched: [6:1.00]
-; SKX-NEXT: btl $7, %edi # sched: [1:0.50]
-; SKX-NEXT: btcl $7, %edi # sched: [1:0.50]
-; SKX-NEXT: btrl $7, %edi # sched: [1:0.50]
-; SKX-NEXT: btsl $7, %edi # sched: [1:0.50]
-; SKX-NEXT: btl $7, (%rdx) # sched: [6:0.50]
-; SKX-NEXT: btcl $7, (%rdx) # sched: [6:1.00]
-; SKX-NEXT: btrl $7, (%rdx) # sched: [6:1.00]
-; SKX-NEXT: btsl $7, (%rdx) # sched: [6:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_bt_btc_btr_bts_32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: btl %esi, %edi # sched: [1:0.50]
-; BDVER2-NEXT: btcl %esi, %edi # sched: [2:0.50]
-; BDVER2-NEXT: btrl %esi, %edi # sched: [2:0.50]
-; BDVER2-NEXT: btsl %esi, %edi # sched: [2:0.50]
-; BDVER2-NEXT: btl %esi, (%rdx) # sched: [5:0.50]
-; BDVER2-NEXT: btcl %esi, (%rdx) # sched: [7:1.00]
-; BDVER2-NEXT: btrl %esi, (%rdx) # sched: [7:1.00]
-; BDVER2-NEXT: btsl %esi, (%rdx) # sched: [7:1.00]
-; BDVER2-NEXT: btl $7, %edi # sched: [1:0.50]
-; BDVER2-NEXT: btcl $7, %edi # sched: [2:0.50]
-; BDVER2-NEXT: btrl $7, %edi # sched: [2:0.50]
-; BDVER2-NEXT: btsl $7, %edi # sched: [2:0.50]
-; BDVER2-NEXT: btl $7, (%rdx) # sched: [5:0.50]
-; BDVER2-NEXT: btcl $7, (%rdx) # sched: [7:1.00]
-; BDVER2-NEXT: btrl $7, (%rdx) # sched: [7:1.00]
-; BDVER2-NEXT: btsl $7, (%rdx) # sched: [7:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_bt_btc_btr_bts_32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: btl %esi, %edi # sched: [1:0.50]
-; BTVER2-NEXT: btcl %esi, %edi # sched: [1:0.50]
-; BTVER2-NEXT: btrl %esi, %edi # sched: [1:0.50]
-; BTVER2-NEXT: btsl %esi, %edi # sched: [1:0.50]
-; BTVER2-NEXT: btl %esi, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: btcl %esi, (%rdx) # sched: [5:1.00]
-; BTVER2-NEXT: btrl %esi, (%rdx) # sched: [5:1.00]
-; BTVER2-NEXT: btsl %esi, (%rdx) # sched: [5:1.00]
-; BTVER2-NEXT: btl $7, %edi # sched: [1:0.50]
-; BTVER2-NEXT: btcl $7, %edi # sched: [1:0.50]
-; BTVER2-NEXT: btrl $7, %edi # sched: [1:0.50]
-; BTVER2-NEXT: btsl $7, %edi # sched: [1:0.50]
-; BTVER2-NEXT: btl $7, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: btcl $7, (%rdx) # sched: [5:1.00]
-; BTVER2-NEXT: btrl $7, (%rdx) # sched: [5:1.00]
-; BTVER2-NEXT: btsl $7, (%rdx) # sched: [5:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_bt_btc_btr_bts_32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: btl %esi, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: btcl %esi, %edi # sched: [2:0.25]
-; ZNVER1-NEXT: btrl %esi, %edi # sched: [2:0.25]
-; ZNVER1-NEXT: btsl %esi, %edi # sched: [2:0.25]
-; ZNVER1-NEXT: btl %esi, (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: btcl %esi, (%rdx) # sched: [6:0.50]
-; ZNVER1-NEXT: btrl %esi, (%rdx) # sched: [6:0.50]
-; ZNVER1-NEXT: btsl %esi, (%rdx) # sched: [6:0.50]
-; ZNVER1-NEXT: btl $7, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: btcl $7, %edi # sched: [2:0.25]
-; ZNVER1-NEXT: btrl $7, %edi # sched: [2:0.25]
-; ZNVER1-NEXT: btsl $7, %edi # sched: [2:0.25]
-; ZNVER1-NEXT: btl $7, (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: btcl $7, (%rdx) # sched: [6:0.50]
-; ZNVER1-NEXT: btrl $7, (%rdx) # sched: [6:0.50]
-; ZNVER1-NEXT: btsl $7, (%rdx) # sched: [6:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "btl $1, $0 \0A\09 btcl $1, $0 \0A\09 btrl $1, $0 \0A\09 btsl $1, $0 \0A\09 btl $1, $2 \0A\09 btcl $1, $2 \0A\09 btrl $1, $2 \0A\09 btsl $1, $2 \0A\09 btl $3, $0 \0A\09 btcl $3, $0 \0A\09 btrl $3, $0 \0A\09 btsl $3, $0 \0A\09 btl $3, $2 \0A\09 btcl $3, $2 \0A\09 btrl $3, $2 \0A\09 btsl $3, $2", "r,r,*m,i"(i32 %a0, i32 %a1, i32 *%a2, i8 7)
- ret void
-}
-define void @test_bt_btc_btr_bts_64(i64 %a0, i64 %a1, i64 *%a2) optsize {
-; GENERIC-LABEL: test_bt_btc_btr_bts_64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: btq %rsi, %rdi # sched: [1:0.50]
-; GENERIC-NEXT: btcq %rsi, %rdi # sched: [1:0.50]
-; GENERIC-NEXT: btrq %rsi, %rdi # sched: [1:0.50]
-; GENERIC-NEXT: btsq %rsi, %rdi # sched: [1:0.50]
-; GENERIC-NEXT: btq %rsi, (%rdx) # sched: [9:1.00]
-; GENERIC-NEXT: btcq %rsi, (%rdx) # sched: [9:1.00]
-; GENERIC-NEXT: btrq %rsi, (%rdx) # sched: [9:1.00]
-; GENERIC-NEXT: btsq %rsi, (%rdx) # sched: [9:1.00]
-; GENERIC-NEXT: btq $7, %rdi # sched: [1:0.50]
-; GENERIC-NEXT: btcq $7, %rdi # sched: [1:0.50]
-; GENERIC-NEXT: btrq $7, %rdi # sched: [1:0.50]
-; GENERIC-NEXT: btsq $7, %rdi # sched: [1:0.50]
-; GENERIC-NEXT: btq $7, (%rdx) # sched: [6:0.50]
-; GENERIC-NEXT: btcq $7, (%rdx) # sched: [7:1.00]
-; GENERIC-NEXT: btrq $7, (%rdx) # sched: [7:1.00]
-; GENERIC-NEXT: btsq $7, (%rdx) # sched: [7:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_bt_btc_btr_bts_64:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: btq %rsi, %rdi # sched: [1:1.00]
-; ATOM-NEXT: btcq %rsi, %rdi # sched: [1:1.00]
-; ATOM-NEXT: btrq %rsi, %rdi # sched: [1:1.00]
-; ATOM-NEXT: btsq %rsi, %rdi # sched: [1:1.00]
-; ATOM-NEXT: btq %rsi, (%rdx) # sched: [9:4.50]
-; ATOM-NEXT: btcq %rsi, (%rdx) # sched: [11:5.50]
-; ATOM-NEXT: btrq %rsi, (%rdx) # sched: [11:5.50]
-; ATOM-NEXT: btsq %rsi, (%rdx) # sched: [11:5.50]
-; ATOM-NEXT: btq $7, %rdi # sched: [1:1.00]
-; ATOM-NEXT: btcq $7, %rdi # sched: [1:1.00]
-; ATOM-NEXT: btrq $7, %rdi # sched: [1:1.00]
-; ATOM-NEXT: btsq $7, %rdi # sched: [1:1.00]
-; ATOM-NEXT: btq $7, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: btcq $7, (%rdx) # sched: [2:1.00]
-; ATOM-NEXT: btrq $7, (%rdx) # sched: [2:1.00]
-; ATOM-NEXT: btsq $7, (%rdx) # sched: [2:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_bt_btc_btr_bts_64:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: btq %rsi, %rdi # sched: [1:0.50]
-; SLM-NEXT: btcq %rsi, %rdi # sched: [1:0.50]
-; SLM-NEXT: btrq %rsi, %rdi # sched: [1:0.50]
-; SLM-NEXT: btsq %rsi, %rdi # sched: [1:0.50]
-; SLM-NEXT: btq %rsi, (%rdx) # sched: [4:1.00]
-; SLM-NEXT: btcq %rsi, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: btrq %rsi, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: btsq %rsi, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: btq $7, %rdi # sched: [1:0.50]
-; SLM-NEXT: btcq $7, %rdi # sched: [1:0.50]
-; SLM-NEXT: btrq $7, %rdi # sched: [1:0.50]
-; SLM-NEXT: btsq $7, %rdi # sched: [1:0.50]
-; SLM-NEXT: btq $7, (%rdx) # sched: [4:1.00]
-; SLM-NEXT: btcq $7, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: btrq $7, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: btsq $7, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_bt_btc_btr_bts_64:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: btq %rsi, %rdi # sched: [1:0.50]
-; SANDY-NEXT: btcq %rsi, %rdi # sched: [1:0.50]
-; SANDY-NEXT: btrq %rsi, %rdi # sched: [1:0.50]
-; SANDY-NEXT: btsq %rsi, %rdi # sched: [1:0.50]
-; SANDY-NEXT: btq %rsi, (%rdx) # sched: [9:1.00]
-; SANDY-NEXT: btcq %rsi, (%rdx) # sched: [9:1.00]
-; SANDY-NEXT: btrq %rsi, (%rdx) # sched: [9:1.00]
-; SANDY-NEXT: btsq %rsi, (%rdx) # sched: [9:1.00]
-; SANDY-NEXT: btq $7, %rdi # sched: [1:0.50]
-; SANDY-NEXT: btcq $7, %rdi # sched: [1:0.50]
-; SANDY-NEXT: btrq $7, %rdi # sched: [1:0.50]
-; SANDY-NEXT: btsq $7, %rdi # sched: [1:0.50]
-; SANDY-NEXT: btq $7, (%rdx) # sched: [6:0.50]
-; SANDY-NEXT: btcq $7, (%rdx) # sched: [7:1.00]
-; SANDY-NEXT: btrq $7, (%rdx) # sched: [7:1.00]
-; SANDY-NEXT: btsq $7, (%rdx) # sched: [7:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_bt_btc_btr_bts_64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: btq %rsi, %rdi # sched: [1:0.50]
-; HASWELL-NEXT: btcq %rsi, %rdi # sched: [1:0.50]
-; HASWELL-NEXT: btrq %rsi, %rdi # sched: [1:0.50]
-; HASWELL-NEXT: btsq %rsi, %rdi # sched: [1:0.50]
-; HASWELL-NEXT: btq %rsi, (%rdx) # sched: [1:2.50]
-; HASWELL-NEXT: btcq %rsi, (%rdx) # sched: [1:2.75]
-; HASWELL-NEXT: btrq %rsi, (%rdx) # sched: [1:2.75]
-; HASWELL-NEXT: btsq %rsi, (%rdx) # sched: [1:2.75]
-; HASWELL-NEXT: btq $7, %rdi # sched: [1:0.50]
-; HASWELL-NEXT: btcq $7, %rdi # sched: [1:0.50]
-; HASWELL-NEXT: btrq $7, %rdi # sched: [1:0.50]
-; HASWELL-NEXT: btsq $7, %rdi # sched: [1:0.50]
-; HASWELL-NEXT: btq $7, (%rdx) # sched: [6:0.50]
-; HASWELL-NEXT: btcq $7, (%rdx) # sched: [7:1.00]
-; HASWELL-NEXT: btrq $7, (%rdx) # sched: [7:1.00]
-; HASWELL-NEXT: btsq $7, (%rdx) # sched: [7:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_bt_btc_btr_bts_64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: btq %rsi, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: btcq %rsi, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: btrq %rsi, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: btsq %rsi, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: btq %rsi, (%rdx) # sched: [6:0.50]
-; BROADWELL-NEXT: btcq %rsi, (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: btrq %rsi, (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: btsq %rsi, (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: btq $7, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: btcq $7, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: btrq $7, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: btsq $7, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: btq $7, (%rdx) # sched: [6:0.50]
-; BROADWELL-NEXT: btcq $7, (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: btrq $7, (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: btsq $7, (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_bt_btc_btr_bts_64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: btq %rsi, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: btcq %rsi, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: btrq %rsi, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: btsq %rsi, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: btq %rsi, (%rdx) # sched: [6:0.50]
-; SKYLAKE-NEXT: btcq %rsi, (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: btrq %rsi, (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: btsq %rsi, (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: btq $7, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: btcq $7, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: btrq $7, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: btsq $7, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: btq $7, (%rdx) # sched: [6:0.50]
-; SKYLAKE-NEXT: btcq $7, (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: btrq $7, (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: btsq $7, (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_bt_btc_btr_bts_64:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: btq %rsi, %rdi # sched: [1:0.50]
-; SKX-NEXT: btcq %rsi, %rdi # sched: [1:0.50]
-; SKX-NEXT: btrq %rsi, %rdi # sched: [1:0.50]
-; SKX-NEXT: btsq %rsi, %rdi # sched: [1:0.50]
-; SKX-NEXT: btq %rsi, (%rdx) # sched: [6:0.50]
-; SKX-NEXT: btcq %rsi, (%rdx) # sched: [6:1.00]
-; SKX-NEXT: btrq %rsi, (%rdx) # sched: [6:1.00]
-; SKX-NEXT: btsq %rsi, (%rdx) # sched: [6:1.00]
-; SKX-NEXT: btq $7, %rdi # sched: [1:0.50]
-; SKX-NEXT: btcq $7, %rdi # sched: [1:0.50]
-; SKX-NEXT: btrq $7, %rdi # sched: [1:0.50]
-; SKX-NEXT: btsq $7, %rdi # sched: [1:0.50]
-; SKX-NEXT: btq $7, (%rdx) # sched: [6:0.50]
-; SKX-NEXT: btcq $7, (%rdx) # sched: [6:1.00]
-; SKX-NEXT: btrq $7, (%rdx) # sched: [6:1.00]
-; SKX-NEXT: btsq $7, (%rdx) # sched: [6:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_bt_btc_btr_bts_64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: btq %rsi, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: btcq %rsi, %rdi # sched: [2:0.50]
-; BDVER2-NEXT: btrq %rsi, %rdi # sched: [2:0.50]
-; BDVER2-NEXT: btsq %rsi, %rdi # sched: [2:0.50]
-; BDVER2-NEXT: btq %rsi, (%rdx) # sched: [5:0.50]
-; BDVER2-NEXT: btcq %rsi, (%rdx) # sched: [7:1.00]
-; BDVER2-NEXT: btrq %rsi, (%rdx) # sched: [7:1.00]
-; BDVER2-NEXT: btsq %rsi, (%rdx) # sched: [7:1.00]
-; BDVER2-NEXT: btq $7, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: btcq $7, %rdi # sched: [2:0.50]
-; BDVER2-NEXT: btrq $7, %rdi # sched: [2:0.50]
-; BDVER2-NEXT: btsq $7, %rdi # sched: [2:0.50]
-; BDVER2-NEXT: btq $7, (%rdx) # sched: [5:0.50]
-; BDVER2-NEXT: btcq $7, (%rdx) # sched: [7:1.00]
-; BDVER2-NEXT: btrq $7, (%rdx) # sched: [7:1.00]
-; BDVER2-NEXT: btsq $7, (%rdx) # sched: [7:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_bt_btc_btr_bts_64:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: btq %rsi, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: btcq %rsi, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: btrq %rsi, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: btsq %rsi, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: btq %rsi, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: btcq %rsi, (%rdx) # sched: [5:1.00]
-; BTVER2-NEXT: btrq %rsi, (%rdx) # sched: [5:1.00]
-; BTVER2-NEXT: btsq %rsi, (%rdx) # sched: [5:1.00]
-; BTVER2-NEXT: btq $7, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: btcq $7, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: btrq $7, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: btsq $7, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: btq $7, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: btcq $7, (%rdx) # sched: [5:1.00]
-; BTVER2-NEXT: btrq $7, (%rdx) # sched: [5:1.00]
-; BTVER2-NEXT: btsq $7, (%rdx) # sched: [5:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_bt_btc_btr_bts_64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: btq %rsi, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: btcq %rsi, %rdi # sched: [2:0.25]
-; ZNVER1-NEXT: btrq %rsi, %rdi # sched: [2:0.25]
-; ZNVER1-NEXT: btsq %rsi, %rdi # sched: [2:0.25]
-; ZNVER1-NEXT: btq %rsi, (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: btcq %rsi, (%rdx) # sched: [6:0.50]
-; ZNVER1-NEXT: btrq %rsi, (%rdx) # sched: [6:0.50]
-; ZNVER1-NEXT: btsq %rsi, (%rdx) # sched: [6:0.50]
-; ZNVER1-NEXT: btq $7, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: btcq $7, %rdi # sched: [2:0.25]
-; ZNVER1-NEXT: btrq $7, %rdi # sched: [2:0.25]
-; ZNVER1-NEXT: btsq $7, %rdi # sched: [2:0.25]
-; ZNVER1-NEXT: btq $7, (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: btcq $7, (%rdx) # sched: [6:0.50]
-; ZNVER1-NEXT: btrq $7, (%rdx) # sched: [6:0.50]
-; ZNVER1-NEXT: btsq $7, (%rdx) # sched: [6:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "btq $1, $0 \0A\09 btcq $1, $0 \0A\09 btrq $1, $0 \0A\09 btsq $1, $0 \0A\09 btq $1, $2 \0A\09 btcq $1, $2 \0A\09 btrq $1, $2 \0A\09 btsq $1, $2 \0A\09 btq $3, $0 \0A\09 btcq $3, $0 \0A\09 btrq $3, $0 \0A\09 btsq $3, $0 \0A\09 btq $3, $2 \0A\09 btcq $3, $2 \0A\09 btrq $3, $2 \0A\09 btsq $3, $2", "r,r,*m,i"(i64 %a0, i64 %a1, i64 *%a2, i8 7)
- ret void
-}
-
-; TODO - test_call
-
-define void @test_cbw_cdq_cdqe_cqo_cwd_cwde() optsize {
-; GENERIC-LABEL: test_cbw_cdq_cdqe_cqo_cwd_cwde:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: cbtw # sched: [1:0.33]
-; GENERIC-NEXT: cltd # sched: [1:0.50]
-; GENERIC-NEXT: cltq # sched: [1:0.33]
-; GENERIC-NEXT: cqto # sched: [1:0.50]
-; GENERIC-NEXT: cwtd # sched: [2:1.00]
-; GENERIC-NEXT: cwtl # sched: [1:0.33]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cbw_cdq_cdqe_cqo_cwd_cwde:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: cbtw # sched: [4:2.00]
-; ATOM-NEXT: cltd # sched: [4:2.00]
-; ATOM-NEXT: cltq # sched: [4:2.00]
-; ATOM-NEXT: cqto # sched: [4:2.00]
-; ATOM-NEXT: cwtd # sched: [4:2.00]
-; ATOM-NEXT: cwtl # sched: [4:2.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cbw_cdq_cdqe_cqo_cwd_cwde:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: cbtw # sched: [1:0.50]
-; SLM-NEXT: cltd # sched: [1:0.50]
-; SLM-NEXT: cltq # sched: [1:0.50]
-; SLM-NEXT: cqto # sched: [1:0.50]
-; SLM-NEXT: cwtd # sched: [1:0.50]
-; SLM-NEXT: cwtl # sched: [1:0.50]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_cbw_cdq_cdqe_cqo_cwd_cwde:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: cbtw # sched: [1:0.33]
-; SANDY-NEXT: cltd # sched: [1:0.50]
-; SANDY-NEXT: cltq # sched: [1:0.33]
-; SANDY-NEXT: cqto # sched: [1:0.50]
-; SANDY-NEXT: cwtd # sched: [2:1.00]
-; SANDY-NEXT: cwtl # sched: [1:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_cbw_cdq_cdqe_cqo_cwd_cwde:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: cbtw # sched: [1:0.25]
-; HASWELL-NEXT: cltd # sched: [1:0.50]
-; HASWELL-NEXT: cltq # sched: [1:0.25]
-; HASWELL-NEXT: cqto # sched: [1:0.50]
-; HASWELL-NEXT: cwtd # sched: [2:0.50]
-; HASWELL-NEXT: cwtl # sched: [1:0.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cbw_cdq_cdqe_cqo_cwd_cwde:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: cbtw # sched: [1:0.25]
-; BROADWELL-NEXT: cltd # sched: [1:0.50]
-; BROADWELL-NEXT: cltq # sched: [1:0.25]
-; BROADWELL-NEXT: cqto # sched: [1:0.50]
-; BROADWELL-NEXT: cwtd # sched: [2:0.50]
-; BROADWELL-NEXT: cwtl # sched: [1:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cbw_cdq_cdqe_cqo_cwd_cwde:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: cbtw # sched: [1:0.25]
-; SKYLAKE-NEXT: cltd # sched: [1:0.50]
-; SKYLAKE-NEXT: cltq # sched: [1:0.25]
-; SKYLAKE-NEXT: cqto # sched: [1:0.50]
-; SKYLAKE-NEXT: cwtd # sched: [2:0.50]
-; SKYLAKE-NEXT: cwtl # sched: [1:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cbw_cdq_cdqe_cqo_cwd_cwde:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: cbtw # sched: [1:0.25]
-; SKX-NEXT: cltd # sched: [1:0.50]
-; SKX-NEXT: cltq # sched: [1:0.25]
-; SKX-NEXT: cqto # sched: [1:0.50]
-; SKX-NEXT: cwtd # sched: [2:0.50]
-; SKX-NEXT: cwtl # sched: [1:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_cbw_cdq_cdqe_cqo_cwd_cwde:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: cbtw # sched: [1:0.50]
-; BDVER2-NEXT: cltd # sched: [1:0.50]
-; BDVER2-NEXT: cltq # sched: [1:0.50]
-; BDVER2-NEXT: cqto # sched: [1:0.50]
-; BDVER2-NEXT: cwtd # sched: [1:0.50]
-; BDVER2-NEXT: cwtl # sched: [1:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_cbw_cdq_cdqe_cqo_cwd_cwde:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: cbtw # sched: [1:0.50]
-; BTVER2-NEXT: cltd # sched: [1:0.50]
-; BTVER2-NEXT: cltq # sched: [1:0.50]
-; BTVER2-NEXT: cqto # sched: [1:0.50]
-; BTVER2-NEXT: cwtd # sched: [1:0.50]
-; BTVER2-NEXT: cwtl # sched: [1:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_cbw_cdq_cdqe_cqo_cwd_cwde:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: cbtw # sched: [1:0.25]
-; ZNVER1-NEXT: cltd # sched: [1:0.25]
-; ZNVER1-NEXT: cltq # sched: [1:0.25]
-; ZNVER1-NEXT: cqto # sched: [1:0.25]
-; ZNVER1-NEXT: cwtd # sched: [1:0.25]
-; ZNVER1-NEXT: cwtl # sched: [1:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "cbw \0A\09 cdq \0A\09 cdqe \0A\09 cqo \0A\09 cwd \0A\09 cwde", ""() nounwind
- ret void
-}
-
-define void @test_clc_cld_cmc() optsize {
-; GENERIC-LABEL: test_clc_cld_cmc:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: clc # sched: [1:0.25]
-; GENERIC-NEXT: cld # sched: [1:0.33]
-; GENERIC-NEXT: cmc # sched: [1:0.33]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_clc_cld_cmc:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: clc # sched: [1:0.50]
-; ATOM-NEXT: cld # sched: [3:1.50]
-; ATOM-NEXT: cmc # sched: [1:0.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_clc_cld_cmc:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: clc # sched: [1:0.50]
-; SLM-NEXT: cld # sched: [1:0.50]
-; SLM-NEXT: cmc # sched: [1:0.50]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_clc_cld_cmc:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: clc # sched: [1:0.25]
-; SANDY-NEXT: cld # sched: [1:0.33]
-; SANDY-NEXT: cmc # sched: [1:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_clc_cld_cmc:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: clc # sched: [1:0.25]
-; HASWELL-NEXT: cld # sched: [3:1.00]
-; HASWELL-NEXT: cmc # sched: [1:0.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_clc_cld_cmc:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: clc # sched: [1:0.25]
-; BROADWELL-NEXT: cld # sched: [3:1.00]
-; BROADWELL-NEXT: cmc # sched: [1:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_clc_cld_cmc:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: clc # sched: [1:0.17]
-; SKYLAKE-NEXT: cld # sched: [3:1.00]
-; SKYLAKE-NEXT: cmc # sched: [1:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_clc_cld_cmc:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: clc # sched: [1:0.17]
-; SKX-NEXT: cld # sched: [3:1.00]
-; SKX-NEXT: cmc # sched: [1:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_clc_cld_cmc:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: clc # sched: [1:0.50]
-; BDVER2-NEXT: cld # sched: [1:0.50]
-; BDVER2-NEXT: cmc # sched: [1:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_clc_cld_cmc:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: clc # sched: [1:0.50]
-; BTVER2-NEXT: cld # sched: [1:0.50]
-; BTVER2-NEXT: cmc # sched: [1:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_clc_cld_cmc:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: clc # sched: [1:0.25]
-; ZNVER1-NEXT: cld # sched: [1:0.25]
-; ZNVER1-NEXT: cmc # sched: [1:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "clc \0A\09 cld \0A\09 cmc", ""() nounwind
- ret void
-}
-
-define void @test_cmp_8(i8 %a0, i8* %a1) optsize {
-; GENERIC-LABEL: test_cmp_8:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: cmpb $7, %al # sched: [1:0.33]
-; GENERIC-NEXT: cmpb $7, %dil # sched: [1:0.33]
-; GENERIC-NEXT: cmpb $7, (%rsi) # sched: [6:0.50]
-; GENERIC-NEXT: cmpb %dil, %dil # sched: [1:0.33]
-; GENERIC-NEXT: cmpb %dil, (%rsi) # sched: [6:0.50]
-; GENERIC-NEXT: cmpb (%rsi), %dil # sched: [6:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cmp_8:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: cmpb $7, %al # sched: [1:0.50]
-; ATOM-NEXT: cmpb $7, %dil # sched: [1:0.50]
-; ATOM-NEXT: cmpb $7, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: cmpb %dil, %dil # sched: [1:0.50]
-; ATOM-NEXT: cmpb %dil, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: cmpb (%rsi), %dil # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cmp_8:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: cmpb $7, %al # sched: [1:0.50]
-; SLM-NEXT: cmpb $7, %dil # sched: [1:0.50]
-; SLM-NEXT: cmpb $7, (%rsi) # sched: [4:1.00]
-; SLM-NEXT: cmpb %dil, %dil # sched: [1:0.50]
-; SLM-NEXT: cmpb %dil, (%rsi) # sched: [4:1.00]
-; SLM-NEXT: cmpb (%rsi), %dil # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_cmp_8:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: cmpb $7, %al # sched: [1:0.33]
-; SANDY-NEXT: cmpb $7, %dil # sched: [1:0.33]
-; SANDY-NEXT: cmpb $7, (%rsi) # sched: [6:0.50]
-; SANDY-NEXT: cmpb %dil, %dil # sched: [1:0.33]
-; SANDY-NEXT: cmpb %dil, (%rsi) # sched: [6:0.50]
-; SANDY-NEXT: cmpb (%rsi), %dil # sched: [6:0.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_cmp_8:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: cmpb $7, %al # sched: [1:0.25]
-; HASWELL-NEXT: cmpb $7, %dil # sched: [1:0.25]
-; HASWELL-NEXT: cmpb $7, (%rsi) # sched: [6:0.50]
-; HASWELL-NEXT: cmpb %dil, %dil # sched: [1:0.25]
-; HASWELL-NEXT: cmpb %dil, (%rsi) # sched: [6:0.50]
-; HASWELL-NEXT: cmpb (%rsi), %dil # sched: [6:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cmp_8:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: cmpb $7, %al # sched: [1:0.25]
-; BROADWELL-NEXT: cmpb $7, %dil # sched: [1:0.25]
-; BROADWELL-NEXT: cmpb $7, (%rsi) # sched: [6:0.50]
-; BROADWELL-NEXT: cmpb %dil, %dil # sched: [1:0.25]
-; BROADWELL-NEXT: cmpb %dil, (%rsi) # sched: [6:0.50]
-; BROADWELL-NEXT: cmpb (%rsi), %dil # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cmp_8:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: cmpb $7, %al # sched: [1:0.25]
-; SKYLAKE-NEXT: cmpb $7, %dil # sched: [1:0.25]
-; SKYLAKE-NEXT: cmpb $7, (%rsi) # sched: [6:0.50]
-; SKYLAKE-NEXT: cmpb %dil, %dil # sched: [1:0.25]
-; SKYLAKE-NEXT: cmpb %dil, (%rsi) # sched: [6:0.50]
-; SKYLAKE-NEXT: cmpb (%rsi), %dil # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cmp_8:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: cmpb $7, %al # sched: [1:0.25]
-; SKX-NEXT: cmpb $7, %dil # sched: [1:0.25]
-; SKX-NEXT: cmpb $7, (%rsi) # sched: [6:0.50]
-; SKX-NEXT: cmpb %dil, %dil # sched: [1:0.25]
-; SKX-NEXT: cmpb %dil, (%rsi) # sched: [6:0.50]
-; SKX-NEXT: cmpb (%rsi), %dil # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_cmp_8:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: cmpb $7, %al # sched: [1:0.50]
-; BDVER2-NEXT: cmpb $7, %dil # sched: [1:0.50]
-; BDVER2-NEXT: cmpb $7, (%rsi) # sched: [5:0.50]
-; BDVER2-NEXT: cmpb %dil, %dil # sched: [1:0.50]
-; BDVER2-NEXT: cmpb %dil, (%rsi) # sched: [5:0.50]
-; BDVER2-NEXT: cmpb (%rsi), %dil # sched: [5:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_cmp_8:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: cmpb $7, %al # sched: [1:0.50]
-; BTVER2-NEXT: cmpb $7, %dil # sched: [1:0.50]
-; BTVER2-NEXT: cmpb $7, (%rsi) # sched: [4:1.00]
-; BTVER2-NEXT: cmpb %dil, %dil # sched: [1:0.50]
-; BTVER2-NEXT: cmpb %dil, (%rsi) # sched: [4:1.00]
-; BTVER2-NEXT: cmpb (%rsi), %dil # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_cmp_8:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: cmpb $7, %al # sched: [1:0.25]
-; ZNVER1-NEXT: cmpb $7, %dil # sched: [1:0.25]
-; ZNVER1-NEXT: cmpb $7, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: cmpb %dil, %dil # sched: [1:0.25]
-; ZNVER1-NEXT: cmpb %dil, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: cmpb (%rsi), %dil # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "cmpb $2, %AL \0A\09 cmpb $2, $0 \0A\09 cmpb $2, $1 \0A\09 cmpb $0, $0 \0A\09 cmpb $0, $1 \0A\09 cmpb $1, $0", "r,*m,i"(i8 %a0, i8* %a1, i8 7) nounwind
- ret void
-}
-define void @test_cmp_16(i16 %a0, i16* %a1) optsize {
-; GENERIC-LABEL: test_cmp_16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: cmpw $511, %ax # imm = 0x1FF
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: cmpw $511, %di # imm = 0x1FF
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: cmpw $511, (%rsi) # imm = 0x1FF
-; GENERIC-NEXT: # sched: [6:0.50]
-; GENERIC-NEXT: cmpw $7, %di # sched: [1:0.33]
-; GENERIC-NEXT: cmpw $7, (%rsi) # sched: [6:0.50]
-; GENERIC-NEXT: cmpw %di, %di # sched: [1:0.33]
-; GENERIC-NEXT: cmpw %di, (%rsi) # sched: [6:0.50]
-; GENERIC-NEXT: cmpw (%rsi), %di # sched: [6:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cmp_16:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: cmpw $511, %ax # imm = 0x1FF
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: cmpw $511, %di # imm = 0x1FF
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: cmpw $511, (%rsi) # imm = 0x1FF
-; ATOM-NEXT: # sched: [1:1.00]
-; ATOM-NEXT: cmpw $7, %di # sched: [1:0.50]
-; ATOM-NEXT: cmpw $7, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: cmpw %di, %di # sched: [1:0.50]
-; ATOM-NEXT: cmpw %di, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: cmpw (%rsi), %di # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cmp_16:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: cmpw $511, %ax # imm = 0x1FF
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: cmpw $511, %di # imm = 0x1FF
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: cmpw $511, (%rsi) # imm = 0x1FF
-; SLM-NEXT: # sched: [4:1.00]
-; SLM-NEXT: cmpw $7, %di # sched: [1:0.50]
-; SLM-NEXT: cmpw $7, (%rsi) # sched: [4:1.00]
-; SLM-NEXT: cmpw %di, %di # sched: [1:0.50]
-; SLM-NEXT: cmpw %di, (%rsi) # sched: [4:1.00]
-; SLM-NEXT: cmpw (%rsi), %di # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_cmp_16:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: cmpw $511, %ax # imm = 0x1FF
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: cmpw $511, %di # imm = 0x1FF
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: cmpw $511, (%rsi) # imm = 0x1FF
-; SANDY-NEXT: # sched: [6:0.50]
-; SANDY-NEXT: cmpw $7, %di # sched: [1:0.33]
-; SANDY-NEXT: cmpw $7, (%rsi) # sched: [6:0.50]
-; SANDY-NEXT: cmpw %di, %di # sched: [1:0.33]
-; SANDY-NEXT: cmpw %di, (%rsi) # sched: [6:0.50]
-; SANDY-NEXT: cmpw (%rsi), %di # sched: [6:0.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_cmp_16:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: cmpw $511, %ax # imm = 0x1FF
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: cmpw $511, %di # imm = 0x1FF
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: cmpw $511, (%rsi) # imm = 0x1FF
-; HASWELL-NEXT: # sched: [6:0.50]
-; HASWELL-NEXT: cmpw $7, %di # sched: [1:0.25]
-; HASWELL-NEXT: cmpw $7, (%rsi) # sched: [6:0.50]
-; HASWELL-NEXT: cmpw %di, %di # sched: [1:0.25]
-; HASWELL-NEXT: cmpw %di, (%rsi) # sched: [6:0.50]
-; HASWELL-NEXT: cmpw (%rsi), %di # sched: [6:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cmp_16:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: cmpw $511, %ax # imm = 0x1FF
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: cmpw $511, %di # imm = 0x1FF
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: cmpw $511, (%rsi) # imm = 0x1FF
-; BROADWELL-NEXT: # sched: [6:0.50]
-; BROADWELL-NEXT: cmpw $7, %di # sched: [1:0.25]
-; BROADWELL-NEXT: cmpw $7, (%rsi) # sched: [6:0.50]
-; BROADWELL-NEXT: cmpw %di, %di # sched: [1:0.25]
-; BROADWELL-NEXT: cmpw %di, (%rsi) # sched: [6:0.50]
-; BROADWELL-NEXT: cmpw (%rsi), %di # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cmp_16:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: cmpw $511, %ax # imm = 0x1FF
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: cmpw $511, %di # imm = 0x1FF
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: cmpw $511, (%rsi) # imm = 0x1FF
-; SKYLAKE-NEXT: # sched: [6:0.50]
-; SKYLAKE-NEXT: cmpw $7, %di # sched: [1:0.25]
-; SKYLAKE-NEXT: cmpw $7, (%rsi) # sched: [6:0.50]
-; SKYLAKE-NEXT: cmpw %di, %di # sched: [1:0.25]
-; SKYLAKE-NEXT: cmpw %di, (%rsi) # sched: [6:0.50]
-; SKYLAKE-NEXT: cmpw (%rsi), %di # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cmp_16:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: cmpw $511, %ax # imm = 0x1FF
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: cmpw $511, %di # imm = 0x1FF
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: cmpw $511, (%rsi) # imm = 0x1FF
-; SKX-NEXT: # sched: [6:0.50]
-; SKX-NEXT: cmpw $7, %di # sched: [1:0.25]
-; SKX-NEXT: cmpw $7, (%rsi) # sched: [6:0.50]
-; SKX-NEXT: cmpw %di, %di # sched: [1:0.25]
-; SKX-NEXT: cmpw %di, (%rsi) # sched: [6:0.50]
-; SKX-NEXT: cmpw (%rsi), %di # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_cmp_16:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: cmpw $511, %ax # imm = 0x1FF
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: cmpw $511, %di # imm = 0x1FF
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: cmpw $511, (%rsi) # imm = 0x1FF
-; BDVER2-NEXT: # sched: [5:0.50]
-; BDVER2-NEXT: cmpw $7, %di # sched: [1:0.50]
-; BDVER2-NEXT: cmpw $7, (%rsi) # sched: [5:0.50]
-; BDVER2-NEXT: cmpw %di, %di # sched: [1:0.50]
-; BDVER2-NEXT: cmpw %di, (%rsi) # sched: [5:0.50]
-; BDVER2-NEXT: cmpw (%rsi), %di # sched: [5:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_cmp_16:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: cmpw $511, %ax # imm = 0x1FF
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: cmpw $511, %di # imm = 0x1FF
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: cmpw $511, (%rsi) # imm = 0x1FF
-; BTVER2-NEXT: # sched: [4:1.00]
-; BTVER2-NEXT: cmpw $7, %di # sched: [1:0.50]
-; BTVER2-NEXT: cmpw $7, (%rsi) # sched: [4:1.00]
-; BTVER2-NEXT: cmpw %di, %di # sched: [1:0.50]
-; BTVER2-NEXT: cmpw %di, (%rsi) # sched: [4:1.00]
-; BTVER2-NEXT: cmpw (%rsi), %di # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_cmp_16:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: cmpw $511, %ax # imm = 0x1FF
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: cmpw $511, %di # imm = 0x1FF
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: cmpw $511, (%rsi) # imm = 0x1FF
-; ZNVER1-NEXT: # sched: [5:0.50]
-; ZNVER1-NEXT: cmpw $7, %di # sched: [1:0.25]
-; ZNVER1-NEXT: cmpw $7, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: cmpw %di, %di # sched: [1:0.25]
-; ZNVER1-NEXT: cmpw %di, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: cmpw (%rsi), %di # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "cmpw $2, %AX \0A\09 cmpw $2, $0 \0A\09 cmpw $2, $1 \0A\09 cmpw $3, $0 \0A\09 cmpw $3, $1 \0A\09 cmpw $0, $0 \0A\09 cmpw $0, $1 \0A\09 cmpw $1, $0", "r,*m,i,i"(i16 %a0, i16* %a1, i16 511, i8 7) nounwind
- ret void
-}
-define void @test_cmp_32(i32 %a0, i32* %a1) optsize {
-; GENERIC-LABEL: test_cmp_32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: cmpl $665536, %eax # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: cmpl $665536, %edi # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: cmpl $665536, (%rsi) # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [6:0.50]
-; GENERIC-NEXT: cmpl $7, %edi # sched: [1:0.33]
-; GENERIC-NEXT: cmpl $7, (%rsi) # sched: [6:0.50]
-; GENERIC-NEXT: cmpl %edi, %edi # sched: [1:0.33]
-; GENERIC-NEXT: cmpl %edi, (%rsi) # sched: [6:0.50]
-; GENERIC-NEXT: cmpl (%rsi), %edi # sched: [6:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cmp_32:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: cmpl $665536, %eax # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: cmpl $665536, %edi # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: cmpl $665536, (%rsi) # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:1.00]
-; ATOM-NEXT: cmpl $7, %edi # sched: [1:0.50]
-; ATOM-NEXT: cmpl $7, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: cmpl %edi, %edi # sched: [1:0.50]
-; ATOM-NEXT: cmpl %edi, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: cmpl (%rsi), %edi # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cmp_32:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: cmpl $665536, %eax # imm = 0xA27C0
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: cmpl $665536, %edi # imm = 0xA27C0
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: cmpl $665536, (%rsi) # imm = 0xA27C0
-; SLM-NEXT: # sched: [4:1.00]
-; SLM-NEXT: cmpl $7, %edi # sched: [1:0.50]
-; SLM-NEXT: cmpl $7, (%rsi) # sched: [4:1.00]
-; SLM-NEXT: cmpl %edi, %edi # sched: [1:0.50]
-; SLM-NEXT: cmpl %edi, (%rsi) # sched: [4:1.00]
-; SLM-NEXT: cmpl (%rsi), %edi # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_cmp_32:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: cmpl $665536, %eax # imm = 0xA27C0
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: cmpl $665536, %edi # imm = 0xA27C0
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: cmpl $665536, (%rsi) # imm = 0xA27C0
-; SANDY-NEXT: # sched: [6:0.50]
-; SANDY-NEXT: cmpl $7, %edi # sched: [1:0.33]
-; SANDY-NEXT: cmpl $7, (%rsi) # sched: [6:0.50]
-; SANDY-NEXT: cmpl %edi, %edi # sched: [1:0.33]
-; SANDY-NEXT: cmpl %edi, (%rsi) # sched: [6:0.50]
-; SANDY-NEXT: cmpl (%rsi), %edi # sched: [6:0.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_cmp_32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: cmpl $665536, %eax # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: cmpl $665536, %edi # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: cmpl $665536, (%rsi) # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [6:0.50]
-; HASWELL-NEXT: cmpl $7, %edi # sched: [1:0.25]
-; HASWELL-NEXT: cmpl $7, (%rsi) # sched: [6:0.50]
-; HASWELL-NEXT: cmpl %edi, %edi # sched: [1:0.25]
-; HASWELL-NEXT: cmpl %edi, (%rsi) # sched: [6:0.50]
-; HASWELL-NEXT: cmpl (%rsi), %edi # sched: [6:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cmp_32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: cmpl $665536, %eax # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: cmpl $665536, %edi # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: cmpl $665536, (%rsi) # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [6:0.50]
-; BROADWELL-NEXT: cmpl $7, %edi # sched: [1:0.25]
-; BROADWELL-NEXT: cmpl $7, (%rsi) # sched: [6:0.50]
-; BROADWELL-NEXT: cmpl %edi, %edi # sched: [1:0.25]
-; BROADWELL-NEXT: cmpl %edi, (%rsi) # sched: [6:0.50]
-; BROADWELL-NEXT: cmpl (%rsi), %edi # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cmp_32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: cmpl $665536, %eax # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: cmpl $665536, %edi # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: cmpl $665536, (%rsi) # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [6:0.50]
-; SKYLAKE-NEXT: cmpl $7, %edi # sched: [1:0.25]
-; SKYLAKE-NEXT: cmpl $7, (%rsi) # sched: [6:0.50]
-; SKYLAKE-NEXT: cmpl %edi, %edi # sched: [1:0.25]
-; SKYLAKE-NEXT: cmpl %edi, (%rsi) # sched: [6:0.50]
-; SKYLAKE-NEXT: cmpl (%rsi), %edi # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cmp_32:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: cmpl $665536, %eax # imm = 0xA27C0
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: cmpl $665536, %edi # imm = 0xA27C0
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: cmpl $665536, (%rsi) # imm = 0xA27C0
-; SKX-NEXT: # sched: [6:0.50]
-; SKX-NEXT: cmpl $7, %edi # sched: [1:0.25]
-; SKX-NEXT: cmpl $7, (%rsi) # sched: [6:0.50]
-; SKX-NEXT: cmpl %edi, %edi # sched: [1:0.25]
-; SKX-NEXT: cmpl %edi, (%rsi) # sched: [6:0.50]
-; SKX-NEXT: cmpl (%rsi), %edi # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_cmp_32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: cmpl $665536, %eax # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: cmpl $665536, %edi # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: cmpl $665536, (%rsi) # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [5:0.50]
-; BDVER2-NEXT: cmpl $7, %edi # sched: [1:0.50]
-; BDVER2-NEXT: cmpl $7, (%rsi) # sched: [5:0.50]
-; BDVER2-NEXT: cmpl %edi, %edi # sched: [1:0.50]
-; BDVER2-NEXT: cmpl %edi, (%rsi) # sched: [5:0.50]
-; BDVER2-NEXT: cmpl (%rsi), %edi # sched: [5:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_cmp_32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: cmpl $665536, %eax # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: cmpl $665536, %edi # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: cmpl $665536, (%rsi) # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [4:1.00]
-; BTVER2-NEXT: cmpl $7, %edi # sched: [1:0.50]
-; BTVER2-NEXT: cmpl $7, (%rsi) # sched: [4:1.00]
-; BTVER2-NEXT: cmpl %edi, %edi # sched: [1:0.50]
-; BTVER2-NEXT: cmpl %edi, (%rsi) # sched: [4:1.00]
-; BTVER2-NEXT: cmpl (%rsi), %edi # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_cmp_32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: cmpl $665536, %eax # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: cmpl $665536, %edi # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: cmpl $665536, (%rsi) # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [5:0.50]
-; ZNVER1-NEXT: cmpl $7, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: cmpl $7, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: cmpl %edi, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: cmpl %edi, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: cmpl (%rsi), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "cmpl $2, %EAX \0A\09 cmpl $2, $0 \0A\09 cmpl $2, $1 \0A\09 cmpl $3, $0 \0A\09 cmpl $3, $1 \0A\09 cmpl $0, $0 \0A\09 cmpl $0, $1 \0A\09 cmpl $1, $0", "r,*m,i,i"(i32 %a0, i32* %a1, i32 665536, i8 7) nounwind
- ret void
-}
-define void @test_cmp_64(i64 %a0, i64* %a1) optsize {
-; GENERIC-LABEL: test_cmp_64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: cmpq $665536, %rax # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: cmpq $665536, %rdi # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: cmpq $665536, (%rsi) # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [6:0.50]
-; GENERIC-NEXT: cmpq $7, %rdi # sched: [1:0.33]
-; GENERIC-NEXT: cmpq $7, (%rsi) # sched: [6:0.50]
-; GENERIC-NEXT: cmpq %rdi, %rdi # sched: [1:0.33]
-; GENERIC-NEXT: cmpq %rdi, (%rsi) # sched: [6:0.50]
-; GENERIC-NEXT: cmpq (%rsi), %rdi # sched: [6:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cmp_64:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: cmpq $665536, %rax # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: cmpq $665536, %rdi # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: cmpq $665536, (%rsi) # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:1.00]
-; ATOM-NEXT: cmpq $7, %rdi # sched: [1:0.50]
-; ATOM-NEXT: cmpq $7, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: cmpq %rdi, %rdi # sched: [1:0.50]
-; ATOM-NEXT: cmpq %rdi, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: cmpq (%rsi), %rdi # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cmp_64:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: cmpq $665536, %rax # imm = 0xA27C0
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: cmpq $665536, %rdi # imm = 0xA27C0
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: cmpq $665536, (%rsi) # imm = 0xA27C0
-; SLM-NEXT: # sched: [4:1.00]
-; SLM-NEXT: cmpq $7, %rdi # sched: [1:0.50]
-; SLM-NEXT: cmpq $7, (%rsi) # sched: [4:1.00]
-; SLM-NEXT: cmpq %rdi, %rdi # sched: [1:0.50]
-; SLM-NEXT: cmpq %rdi, (%rsi) # sched: [4:1.00]
-; SLM-NEXT: cmpq (%rsi), %rdi # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_cmp_64:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: cmpq $665536, %rax # imm = 0xA27C0
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: cmpq $665536, %rdi # imm = 0xA27C0
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: cmpq $665536, (%rsi) # imm = 0xA27C0
-; SANDY-NEXT: # sched: [6:0.50]
-; SANDY-NEXT: cmpq $7, %rdi # sched: [1:0.33]
-; SANDY-NEXT: cmpq $7, (%rsi) # sched: [6:0.50]
-; SANDY-NEXT: cmpq %rdi, %rdi # sched: [1:0.33]
-; SANDY-NEXT: cmpq %rdi, (%rsi) # sched: [6:0.50]
-; SANDY-NEXT: cmpq (%rsi), %rdi # sched: [6:0.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_cmp_64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: cmpq $665536, %rax # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: cmpq $665536, %rdi # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: cmpq $665536, (%rsi) # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [6:0.50]
-; HASWELL-NEXT: cmpq $7, %rdi # sched: [1:0.25]
-; HASWELL-NEXT: cmpq $7, (%rsi) # sched: [6:0.50]
-; HASWELL-NEXT: cmpq %rdi, %rdi # sched: [1:0.25]
-; HASWELL-NEXT: cmpq %rdi, (%rsi) # sched: [6:0.50]
-; HASWELL-NEXT: cmpq (%rsi), %rdi # sched: [6:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cmp_64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: cmpq $665536, %rax # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: cmpq $665536, %rdi # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: cmpq $665536, (%rsi) # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [6:0.50]
-; BROADWELL-NEXT: cmpq $7, %rdi # sched: [1:0.25]
-; BROADWELL-NEXT: cmpq $7, (%rsi) # sched: [6:0.50]
-; BROADWELL-NEXT: cmpq %rdi, %rdi # sched: [1:0.25]
-; BROADWELL-NEXT: cmpq %rdi, (%rsi) # sched: [6:0.50]
-; BROADWELL-NEXT: cmpq (%rsi), %rdi # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cmp_64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: cmpq $665536, %rax # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: cmpq $665536, %rdi # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: cmpq $665536, (%rsi) # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [6:0.50]
-; SKYLAKE-NEXT: cmpq $7, %rdi # sched: [1:0.25]
-; SKYLAKE-NEXT: cmpq $7, (%rsi) # sched: [6:0.50]
-; SKYLAKE-NEXT: cmpq %rdi, %rdi # sched: [1:0.25]
-; SKYLAKE-NEXT: cmpq %rdi, (%rsi) # sched: [6:0.50]
-; SKYLAKE-NEXT: cmpq (%rsi), %rdi # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cmp_64:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: cmpq $665536, %rax # imm = 0xA27C0
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: cmpq $665536, %rdi # imm = 0xA27C0
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: cmpq $665536, (%rsi) # imm = 0xA27C0
-; SKX-NEXT: # sched: [6:0.50]
-; SKX-NEXT: cmpq $7, %rdi # sched: [1:0.25]
-; SKX-NEXT: cmpq $7, (%rsi) # sched: [6:0.50]
-; SKX-NEXT: cmpq %rdi, %rdi # sched: [1:0.25]
-; SKX-NEXT: cmpq %rdi, (%rsi) # sched: [6:0.50]
-; SKX-NEXT: cmpq (%rsi), %rdi # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_cmp_64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: cmpq $665536, %rax # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: cmpq $665536, %rdi # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: cmpq $665536, (%rsi) # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [5:0.50]
-; BDVER2-NEXT: cmpq $7, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: cmpq $7, (%rsi) # sched: [5:0.50]
-; BDVER2-NEXT: cmpq %rdi, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: cmpq %rdi, (%rsi) # sched: [5:0.50]
-; BDVER2-NEXT: cmpq (%rsi), %rdi # sched: [5:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_cmp_64:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: cmpq $665536, %rax # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: cmpq $665536, %rdi # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: cmpq $665536, (%rsi) # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [4:1.00]
-; BTVER2-NEXT: cmpq $7, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: cmpq $7, (%rsi) # sched: [4:1.00]
-; BTVER2-NEXT: cmpq %rdi, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: cmpq %rdi, (%rsi) # sched: [4:1.00]
-; BTVER2-NEXT: cmpq (%rsi), %rdi # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_cmp_64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: cmpq $665536, %rax # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: cmpq $665536, %rdi # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: cmpq $665536, (%rsi) # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [5:0.50]
-; ZNVER1-NEXT: cmpq $7, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: cmpq $7, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: cmpq %rdi, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: cmpq %rdi, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: cmpq (%rsi), %rdi # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "cmpq $2, %RAX \0A\09 cmpq $2, $0 \0A\09 cmpq $2, $1 \0A\09 cmpq $3, $0 \0A\09 cmpq $3, $1 \0A\09 cmpq $0, $0 \0A\09 cmpq $0, $1 \0A\09 cmpq $1, $0", "r,*m,i,i"(i64 %a0, i64* %a1, i32 665536, i8 7) nounwind
- ret void
-}
-
-define void @test_cmps() optsize {
-; GENERIC-LABEL: test_cmps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: cmpsb %es:(%rdi), (%rsi) # sched: [8:1.00]
-; GENERIC-NEXT: cmpsw %es:(%rdi), (%rsi) # sched: [8:1.00]
-; GENERIC-NEXT: cmpsl %es:(%rdi), (%rsi) # sched: [8:1.00]
-; GENERIC-NEXT: cmpsq %es:(%rdi), (%rsi) # sched: [8:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cmps:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: cmpsb %es:(%rdi), (%rsi) # sched: [3:1.50]
-; ATOM-NEXT: cmpsw %es:(%rdi), (%rsi) # sched: [3:1.50]
-; ATOM-NEXT: cmpsl %es:(%rdi), (%rsi) # sched: [3:1.50]
-; ATOM-NEXT: cmpsq %es:(%rdi), (%rsi) # sched: [3:1.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cmps:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: cmpsb %es:(%rdi), (%rsi) # sched: [100:1.00]
-; SLM-NEXT: cmpsw %es:(%rdi), (%rsi) # sched: [100:1.00]
-; SLM-NEXT: cmpsl %es:(%rdi), (%rsi) # sched: [100:1.00]
-; SLM-NEXT: cmpsq %es:(%rdi), (%rsi) # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_cmps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: cmpsb %es:(%rdi), (%rsi) # sched: [8:1.00]
-; SANDY-NEXT: cmpsw %es:(%rdi), (%rsi) # sched: [8:1.00]
-; SANDY-NEXT: cmpsl %es:(%rdi), (%rsi) # sched: [8:1.00]
-; SANDY-NEXT: cmpsq %es:(%rdi), (%rsi) # sched: [8:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_cmps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: cmpsb %es:(%rdi), (%rsi) # sched: [4:1.00]
-; HASWELL-NEXT: cmpsw %es:(%rdi), (%rsi) # sched: [4:1.00]
-; HASWELL-NEXT: cmpsl %es:(%rdi), (%rsi) # sched: [4:1.00]
-; HASWELL-NEXT: cmpsq %es:(%rdi), (%rsi) # sched: [4:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cmps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: cmpsb %es:(%rdi), (%rsi) # sched: [100:0.25]
-; BROADWELL-NEXT: cmpsw %es:(%rdi), (%rsi) # sched: [100:0.25]
-; BROADWELL-NEXT: cmpsl %es:(%rdi), (%rsi) # sched: [100:0.25]
-; BROADWELL-NEXT: cmpsq %es:(%rdi), (%rsi) # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cmps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: cmpsb %es:(%rdi), (%rsi) # sched: [100:0.25]
-; SKYLAKE-NEXT: cmpsw %es:(%rdi), (%rsi) # sched: [100:0.25]
-; SKYLAKE-NEXT: cmpsl %es:(%rdi), (%rsi) # sched: [100:0.25]
-; SKYLAKE-NEXT: cmpsq %es:(%rdi), (%rsi) # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cmps:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: cmpsb %es:(%rdi), (%rsi) # sched: [100:0.25]
-; SKX-NEXT: cmpsw %es:(%rdi), (%rsi) # sched: [100:0.25]
-; SKX-NEXT: cmpsl %es:(%rdi), (%rsi) # sched: [100:0.25]
-; SKX-NEXT: cmpsq %es:(%rdi), (%rsi) # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_cmps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: cmpsb %es:(%rdi), (%rsi) # sched: [100:0.50]
-; BDVER2-NEXT: cmpsw %es:(%rdi), (%rsi) # sched: [100:0.50]
-; BDVER2-NEXT: cmpsl %es:(%rdi), (%rsi) # sched: [100:0.50]
-; BDVER2-NEXT: cmpsq %es:(%rdi), (%rsi) # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_cmps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: cmpsb %es:(%rdi), (%rsi) # sched: [100:0.50]
-; BTVER2-NEXT: cmpsw %es:(%rdi), (%rsi) # sched: [100:0.50]
-; BTVER2-NEXT: cmpsl %es:(%rdi), (%rsi) # sched: [100:0.50]
-; BTVER2-NEXT: cmpsq %es:(%rdi), (%rsi) # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_cmps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: cmpsb %es:(%rdi), (%rsi) # sched: [100:0.25]
-; ZNVER1-NEXT: cmpsw %es:(%rdi), (%rsi) # sched: [100:0.25]
-; ZNVER1-NEXT: cmpsl %es:(%rdi), (%rsi) # sched: [100:0.25]
-; ZNVER1-NEXT: cmpsq %es:(%rdi), (%rsi) # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "cmpsb \0A\09 cmpsw \0A\09 cmpsl \0A\09 cmpsq", ""()
- ret void
-}
-
-define void @test_cmpxchg_8(i8 %a0, i8 %a1, i8 *%a2) optsize {
-; GENERIC-LABEL: test_cmpxchg_8:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: cmpxchgb %dil, %sil # sched: [5:1.33]
-; GENERIC-NEXT: cmpxchgb %dil, (%rdx) # sched: [8:2.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cmpxchg_8:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: cmpxchgb %dil, %sil # sched: [9:4.50]
-; ATOM-NEXT: cmpxchgb %dil, (%rdx) # sched: [6:3.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cmpxchg_8:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: cmpxchgb %dil, %sil # sched: [1:0.50]
-; SLM-NEXT: cmpxchgb %dil, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_cmpxchg_8:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: cmpxchgb %dil, %sil # sched: [5:1.33]
-; SANDY-NEXT: cmpxchgb %dil, (%rdx) # sched: [8:2.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_cmpxchg_8:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: cmpxchgb %dil, %sil # sched: [5:1.25]
-; HASWELL-NEXT: cmpxchgb %dil, (%rdx) # sched: [9:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cmpxchg_8:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: cmpxchgb %dil, %sil # sched: [5:1.25]
-; BROADWELL-NEXT: cmpxchgb %dil, (%rdx) # sched: [8:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cmpxchg_8:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: cmpxchgb %dil, %sil # sched: [5:1.25]
-; SKYLAKE-NEXT: cmpxchgb %dil, (%rdx) # sched: [8:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cmpxchg_8:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: cmpxchgb %dil, %sil # sched: [5:1.25]
-; SKX-NEXT: cmpxchgb %dil, (%rdx) # sched: [8:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_cmpxchg_8:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: cmpxchgb %dil, %sil # sched: [3:1.00]
-; BDVER2-NEXT: cmpxchgb %dil, (%rdx) # sched: [3:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_cmpxchg_8:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: cmpxchgb %dil, %sil # sched: [1:0.50]
-; BTVER2-NEXT: cmpxchgb %dil, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_cmpxchg_8:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: cmpxchgb %dil, %sil # sched: [1:0.25]
-; ZNVER1-NEXT: cmpxchgb %dil, (%rdx) # sched: [8:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "cmpxchgb $0, $1 \0a\09 cmpxchgb $0, $2", "r,r,*m"(i8 %a0, i8 %a1, i8 *%a2) nounwind
- ret void
-}
-define void @test_cmpxchg_16(i16 %a0, i16 %a1, i16 *%a2) optsize {
-; GENERIC-LABEL: test_cmpxchg_16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: cmpxchgw %di, %si # sched: [5:1.33]
-; GENERIC-NEXT: cmpxchgw %di, (%rdx) # sched: [8:2.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cmpxchg_16:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: cmpxchgw %di, %si # sched: [15:7.50]
-; ATOM-NEXT: cmpxchgw %di, (%rdx) # sched: [14:7.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cmpxchg_16:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: cmpxchgw %di, %si # sched: [1:0.50]
-; SLM-NEXT: cmpxchgw %di, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_cmpxchg_16:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: cmpxchgw %di, %si # sched: [5:1.33]
-; SANDY-NEXT: cmpxchgw %di, (%rdx) # sched: [8:2.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_cmpxchg_16:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: cmpxchgw %di, %si # sched: [5:1.25]
-; HASWELL-NEXT: cmpxchgw %di, (%rdx) # sched: [9:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cmpxchg_16:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: cmpxchgw %di, %si # sched: [5:1.25]
-; BROADWELL-NEXT: cmpxchgw %di, (%rdx) # sched: [8:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cmpxchg_16:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: cmpxchgw %di, %si # sched: [5:1.25]
-; SKYLAKE-NEXT: cmpxchgw %di, (%rdx) # sched: [8:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cmpxchg_16:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: cmpxchgw %di, %si # sched: [5:1.25]
-; SKX-NEXT: cmpxchgw %di, (%rdx) # sched: [8:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_cmpxchg_16:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: cmpxchgw %di, %si # sched: [3:1.00]
-; BDVER2-NEXT: cmpxchgw %di, (%rdx) # sched: [3:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_cmpxchg_16:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: cmpxchgw %di, %si # sched: [1:0.50]
-; BTVER2-NEXT: cmpxchgw %di, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_cmpxchg_16:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: cmpxchgw %di, %si # sched: [1:0.25]
-; ZNVER1-NEXT: cmpxchgw %di, (%rdx) # sched: [8:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "cmpxchgw $0, $1 \0a\09 cmpxchgw $0, $2", "r,r,*m"(i16 %a0, i16 %a1, i16 *%a2) nounwind
- ret void
-}
-define void @test_cmpxchg_32(i32 %a0, i32 %a1, i32 *%a2) optsize {
-; GENERIC-LABEL: test_cmpxchg_32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: cmpxchgl %edi, %esi # sched: [5:1.33]
-; GENERIC-NEXT: cmpxchgl %edi, (%rdx) # sched: [8:2.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cmpxchg_32:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: cmpxchgl %edi, %esi # sched: [15:7.50]
-; ATOM-NEXT: cmpxchgl %edi, (%rdx) # sched: [14:7.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cmpxchg_32:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: cmpxchgl %edi, %esi # sched: [1:0.50]
-; SLM-NEXT: cmpxchgl %edi, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_cmpxchg_32:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: cmpxchgl %edi, %esi # sched: [5:1.33]
-; SANDY-NEXT: cmpxchgl %edi, (%rdx) # sched: [8:2.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_cmpxchg_32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: cmpxchgl %edi, %esi # sched: [5:1.25]
-; HASWELL-NEXT: cmpxchgl %edi, (%rdx) # sched: [9:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cmpxchg_32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: cmpxchgl %edi, %esi # sched: [5:1.25]
-; BROADWELL-NEXT: cmpxchgl %edi, (%rdx) # sched: [8:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cmpxchg_32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: cmpxchgl %edi, %esi # sched: [5:1.25]
-; SKYLAKE-NEXT: cmpxchgl %edi, (%rdx) # sched: [8:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cmpxchg_32:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: cmpxchgl %edi, %esi # sched: [5:1.25]
-; SKX-NEXT: cmpxchgl %edi, (%rdx) # sched: [8:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_cmpxchg_32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: cmpxchgl %edi, %esi # sched: [3:1.00]
-; BDVER2-NEXT: cmpxchgl %edi, (%rdx) # sched: [3:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_cmpxchg_32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: cmpxchgl %edi, %esi # sched: [1:0.50]
-; BTVER2-NEXT: cmpxchgl %edi, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_cmpxchg_32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: cmpxchgl %edi, %esi # sched: [1:0.25]
-; ZNVER1-NEXT: cmpxchgl %edi, (%rdx) # sched: [8:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "cmpxchgl $0, $1 \0a\09 cmpxchgl $0, $2", "r,r,*m"(i32 %a0, i32 %a1, i32 *%a2) nounwind
- ret void
-}
-define void @test_cmpxchg_64(i64 %a0, i64 %a1, i64 *%a2) optsize {
-; GENERIC-LABEL: test_cmpxchg_64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: cmpxchgq %rdi, %rsi # sched: [5:1.33]
-; GENERIC-NEXT: cmpxchgq %rdi, (%rdx) # sched: [8:2.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cmpxchg_64:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: cmpxchgq %rdi, %rsi # sched: [15:7.50]
-; ATOM-NEXT: cmpxchgq %rdi, (%rdx) # sched: [14:7.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cmpxchg_64:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: cmpxchgq %rdi, %rsi # sched: [1:0.50]
-; SLM-NEXT: cmpxchgq %rdi, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_cmpxchg_64:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: cmpxchgq %rdi, %rsi # sched: [5:1.33]
-; SANDY-NEXT: cmpxchgq %rdi, (%rdx) # sched: [8:2.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_cmpxchg_64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: cmpxchgq %rdi, %rsi # sched: [5:1.25]
-; HASWELL-NEXT: cmpxchgq %rdi, (%rdx) # sched: [9:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cmpxchg_64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: cmpxchgq %rdi, %rsi # sched: [5:1.25]
-; BROADWELL-NEXT: cmpxchgq %rdi, (%rdx) # sched: [8:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cmpxchg_64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: cmpxchgq %rdi, %rsi # sched: [5:1.25]
-; SKYLAKE-NEXT: cmpxchgq %rdi, (%rdx) # sched: [8:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cmpxchg_64:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: cmpxchgq %rdi, %rsi # sched: [5:1.25]
-; SKX-NEXT: cmpxchgq %rdi, (%rdx) # sched: [8:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_cmpxchg_64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: cmpxchgq %rdi, %rsi # sched: [3:1.00]
-; BDVER2-NEXT: cmpxchgq %rdi, (%rdx) # sched: [3:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_cmpxchg_64:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: cmpxchgq %rdi, %rsi # sched: [1:0.50]
-; BTVER2-NEXT: cmpxchgq %rdi, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_cmpxchg_64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: cmpxchgq %rdi, %rsi # sched: [1:0.25]
-; ZNVER1-NEXT: cmpxchgq %rdi, (%rdx) # sched: [8:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "cmpxchgq $0, $1 \0a\09 cmpxchgq $0, $2", "r,r,*m"(i64 %a0, i64 %a1, i64 *%a2) nounwind
- ret void
-}
-define void @test_cmpxchg8b_cmpxchg16b(i8 *%a0) optsize {
-; GENERIC-LABEL: test_cmpxchg8b_cmpxchg16b:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: cmpxchg8b (%rdi) # sched: [6:1.00]
-; GENERIC-NEXT: cmpxchg16b (%rdi) # sched: [6:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cmpxchg8b_cmpxchg16b:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: cmpxchg8b (%rdi) # sched: [18:9.00]
-; ATOM-NEXT: cmpxchg16b (%rdi) # sched: [22:11.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cmpxchg8b_cmpxchg16b:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: cmpxchg8b (%rdi) # sched: [4:2.00]
-; SLM-NEXT: cmpxchg16b (%rdi) # sched: [4:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_cmpxchg8b_cmpxchg16b:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: cmpxchg8b (%rdi) # sched: [6:1.00]
-; SANDY-NEXT: cmpxchg16b (%rdi) # sched: [6:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_cmpxchg8b_cmpxchg16b:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: cmpxchg8b (%rdi) # sched: [17:2.75]
-; HASWELL-NEXT: cmpxchg16b (%rdi) # sched: [22:4.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cmpxchg8b_cmpxchg16b:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: cmpxchg8b (%rdi) # sched: [16:2.75]
-; BROADWELL-NEXT: cmpxchg16b (%rdi) # sched: [21:4.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cmpxchg8b_cmpxchg16b:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: cmpxchg8b (%rdi) # sched: [16:2.75]
-; SKYLAKE-NEXT: cmpxchg16b (%rdi) # sched: [23:4.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cmpxchg8b_cmpxchg16b:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: cmpxchg8b (%rdi) # sched: [16:2.75]
-; SKX-NEXT: cmpxchg16b (%rdi) # sched: [23:4.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_cmpxchg8b_cmpxchg16b:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: cmpxchg8b (%rdi) # sched: [3:1.00]
-; BDVER2-NEXT: cmpxchg16b (%rdi) # sched: [3:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_cmpxchg8b_cmpxchg16b:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: cmpxchg8b (%rdi) # sched: [4:1.00]
-; BTVER2-NEXT: cmpxchg16b (%rdi) # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_cmpxchg8b_cmpxchg16b:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: cmpxchg8b (%rdi) # sched: [1:0.50]
-; ZNVER1-NEXT: cmpxchg16b (%rdi) # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "cmpxchg8b $0 \0a\09 cmpxchg16b $0", "*m"(i8 *%a0) nounwind
- ret void
-}
-
-define void @test_cpuid() optsize {
-; GENERIC-LABEL: test_cpuid:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: cpuid # sched: [100:0.33]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cpuid:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: cpuid # sched: [121:60.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cpuid:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: cpuid # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_cpuid:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: cpuid # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_cpuid:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: cpuid # sched: [18:2.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cpuid:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: cpuid # sched: [18:2.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cpuid:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: cpuid # sched: [18:2.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cpuid:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: cpuid # sched: [18:2.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_cpuid:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: cpuid # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_cpuid:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: cpuid # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_cpuid:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: cpuid # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "cpuid", ""() nounwind
- ret void
-}
-
-define void @test_dec8(i8 %a0, i8* %a1) optsize {
-; GENERIC-LABEL: test_dec8:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: decb %dil # sched: [1:0.33]
-; GENERIC-NEXT: decb (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_dec8:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: decb %dil # sched: [1:0.50]
-; ATOM-NEXT: decb (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_dec8:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: decb %dil # sched: [1:0.50]
-; SLM-NEXT: decb (%rsi) # sched: [5:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_dec8:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: decb %dil # sched: [1:0.33]
-; SANDY-NEXT: decb (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_dec8:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: decb %dil # sched: [1:0.25]
-; HASWELL-NEXT: decb (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_dec8:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: decb %dil # sched: [1:0.25]
-; BROADWELL-NEXT: decb (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_dec8:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: decb %dil # sched: [1:0.25]
-; SKYLAKE-NEXT: decb (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_dec8:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: decb %dil # sched: [1:0.25]
-; SKX-NEXT: decb (%rsi) # sched: [7:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_dec8:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: decb %dil # sched: [1:0.50]
-; BDVER2-NEXT: decb (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_dec8:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: decb %dil # sched: [1:0.50]
-; BTVER2-NEXT: decb (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_dec8:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: decb %dil # sched: [1:0.25]
-; ZNVER1-NEXT: decb (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "decb $0 \0A\09 decb $1", "r,*m"(i8 %a0, i8* %a1) nounwind
- ret void
-}
-define void @test_dec16(i16 %a0, i16* %a1) optsize {
-; GENERIC-LABEL: test_dec16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: decw %di # sched: [1:0.33]
-; GENERIC-NEXT: decw (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_dec16:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: decw %di # sched: [1:0.50]
-; ATOM-NEXT: decw (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_dec16:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: decw %di # sched: [1:0.50]
-; SLM-NEXT: decw (%rsi) # sched: [5:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_dec16:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: decw %di # sched: [1:0.33]
-; SANDY-NEXT: decw (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_dec16:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: decw %di # sched: [1:0.25]
-; HASWELL-NEXT: decw (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_dec16:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: decw %di # sched: [1:0.25]
-; BROADWELL-NEXT: decw (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_dec16:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: decw %di # sched: [1:0.25]
-; SKYLAKE-NEXT: decw (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_dec16:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: decw %di # sched: [1:0.25]
-; SKX-NEXT: decw (%rsi) # sched: [7:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_dec16:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: decw %di # sched: [1:0.50]
-; BDVER2-NEXT: decw (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_dec16:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: decw %di # sched: [1:0.50]
-; BTVER2-NEXT: decw (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_dec16:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: decw %di # sched: [1:0.25]
-; ZNVER1-NEXT: decw (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "decw $0 \0A\09 decw $1", "r,*m"(i16 %a0, i16* %a1) nounwind
- ret void
-}
-define void @test_dec32(i32 %a0, i32* %a1) optsize {
-; GENERIC-LABEL: test_dec32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: decl %edi # sched: [1:0.33]
-; GENERIC-NEXT: decl (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_dec32:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: decl %edi # sched: [1:0.50]
-; ATOM-NEXT: decl (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_dec32:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: decl %edi # sched: [1:0.50]
-; SLM-NEXT: decl (%rsi) # sched: [5:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_dec32:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: decl %edi # sched: [1:0.33]
-; SANDY-NEXT: decl (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_dec32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: decl %edi # sched: [1:0.25]
-; HASWELL-NEXT: decl (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_dec32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: decl %edi # sched: [1:0.25]
-; BROADWELL-NEXT: decl (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_dec32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: decl %edi # sched: [1:0.25]
-; SKYLAKE-NEXT: decl (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_dec32:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: decl %edi # sched: [1:0.25]
-; SKX-NEXT: decl (%rsi) # sched: [7:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_dec32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: decl %edi # sched: [1:0.50]
-; BDVER2-NEXT: decl (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_dec32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: decl %edi # sched: [1:0.50]
-; BTVER2-NEXT: decl (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_dec32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: decl %edi # sched: [1:0.25]
-; ZNVER1-NEXT: decl (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "decl $0 \0A\09 decl $1", "r,*m"(i32 %a0, i32* %a1) nounwind
- ret void
-}
-define void @test_dec64(i64 %a0, i64* %a1) optsize {
-; GENERIC-LABEL: test_dec64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: decq %rdi # sched: [1:0.33]
-; GENERIC-NEXT: decq (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_dec64:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: decq %rdi # sched: [1:0.50]
-; ATOM-NEXT: decq (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_dec64:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: decq %rdi # sched: [1:0.50]
-; SLM-NEXT: decq (%rsi) # sched: [5:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_dec64:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: decq %rdi # sched: [1:0.33]
-; SANDY-NEXT: decq (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_dec64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: decq %rdi # sched: [1:0.25]
-; HASWELL-NEXT: decq (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_dec64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: decq %rdi # sched: [1:0.25]
-; BROADWELL-NEXT: decq (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_dec64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: decq %rdi # sched: [1:0.25]
-; SKYLAKE-NEXT: decq (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_dec64:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: decq %rdi # sched: [1:0.25]
-; SKX-NEXT: decq (%rsi) # sched: [7:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_dec64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: decq %rdi # sched: [1:0.50]
-; BDVER2-NEXT: decq (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_dec64:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: decq %rdi # sched: [1:0.50]
-; BTVER2-NEXT: decq (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_dec64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: decq %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: decq (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "decq $0 \0A\09 decq $1", "r,*m"(i64 %a0, i64* %a1) nounwind
- ret void
-}
-
-define void @test_div(i8 %a0, i16 %a1, i32 %a2, i64 %a3, i8 *%p0, i16 *%p1, i32 *%p2, i64 *%p3) optsize {
-; GENERIC-LABEL: test_div:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
-; GENERIC-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: divb %dil # sched: [25:10.00]
-; GENERIC-NEXT: divb (%r8) # sched: [30:10.00]
-; GENERIC-NEXT: divw %si # sched: [25:10.00]
-; GENERIC-NEXT: divw (%r9) # sched: [30:10.00]
-; GENERIC-NEXT: divl %edx # sched: [25:10.00]
-; GENERIC-NEXT: divl (%rax) # sched: [30:10.00]
-; GENERIC-NEXT: divq %rcx # sched: [25:10.00]
-; GENERIC-NEXT: divq (%r10) # sched: [30:10.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_div:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [1:1.00]
-; ATOM-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: divb %dil # sched: [50:25.00]
-; ATOM-NEXT: divb (%r8) # sched: [68:34.00]
-; ATOM-NEXT: divw %si # sched: [50:25.00]
-; ATOM-NEXT: divw (%r9) # sched: [50:25.00]
-; ATOM-NEXT: divl %edx # sched: [50:25.00]
-; ATOM-NEXT: divl (%rax) # sched: [50:25.00]
-; ATOM-NEXT: divq %rcx # sched: [130:65.00]
-; ATOM-NEXT: divq (%r10) # sched: [130:65.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_div:
-; SLM: # %bb.0:
-; SLM-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [3:1.00]
-; SLM-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: divb %dil # sched: [25:25.00]
-; SLM-NEXT: divb (%r8) # sched: [29:25.00]
-; SLM-NEXT: divw %si # sched: [25:25.00]
-; SLM-NEXT: divw (%r9) # sched: [29:25.00]
-; SLM-NEXT: divl %edx # sched: [25:25.00]
-; SLM-NEXT: divl (%rax) # sched: [29:25.00]
-; SLM-NEXT: divq %rcx # sched: [25:25.00]
-; SLM-NEXT: divq (%r10) # sched: [29:25.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_div:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
-; SANDY-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: divb %dil # sched: [25:10.00]
-; SANDY-NEXT: divb (%r8) # sched: [30:10.00]
-; SANDY-NEXT: divw %si # sched: [25:10.00]
-; SANDY-NEXT: divw (%r9) # sched: [30:10.00]
-; SANDY-NEXT: divl %edx # sched: [25:10.00]
-; SANDY-NEXT: divl (%rax) # sched: [30:10.00]
-; SANDY-NEXT: divq %rcx # sched: [25:10.00]
-; SANDY-NEXT: divq (%r10) # sched: [30:10.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_div:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
-; HASWELL-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: divb %dil # sched: [22:1.00]
-; HASWELL-NEXT: divb (%r8) # sched: [29:10.00]
-; HASWELL-NEXT: divw %si # sched: [98:8.00]
-; HASWELL-NEXT: divw (%r9) # sched: [29:10.00]
-; HASWELL-NEXT: divl %edx # sched: [98:8.00]
-; HASWELL-NEXT: divl (%rax) # sched: [29:10.00]
-; HASWELL-NEXT: divq %rcx # sched: [98:8.00]
-; HASWELL-NEXT: divq (%r10) # sched: [29:10.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_div:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
-; BROADWELL-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: divb %dil # sched: [25:10.00]
-; BROADWELL-NEXT: divb (%r8) # sched: [34:2.00]
-; BROADWELL-NEXT: divw %si # sched: [80:8.00]
-; BROADWELL-NEXT: divw (%r9) # sched: [34:2.00]
-; BROADWELL-NEXT: divl %edx # sched: [80:8.00]
-; BROADWELL-NEXT: divl (%rax) # sched: [34:2.00]
-; BROADWELL-NEXT: divq %rcx # sched: [80:8.00]
-; BROADWELL-NEXT: divq (%r10) # sched: [34:2.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_div:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
-; SKYLAKE-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: divb %dil # sched: [25:10.00]
-; SKYLAKE-NEXT: divb (%r8) # sched: [29:10.00]
-; SKYLAKE-NEXT: divw %si # sched: [76:8.00]
-; SKYLAKE-NEXT: divw (%r9) # sched: [29:10.00]
-; SKYLAKE-NEXT: divl %edx # sched: [76:8.00]
-; SKYLAKE-NEXT: divl (%rax) # sched: [29:10.00]
-; SKYLAKE-NEXT: divq %rcx # sched: [76:8.00]
-; SKYLAKE-NEXT: divq (%r10) # sched: [29:10.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_div:
-; SKX: # %bb.0:
-; SKX-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
-; SKX-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: divb %dil # sched: [25:10.00]
-; SKX-NEXT: divb (%r8) # sched: [29:10.00]
-; SKX-NEXT: divw %si # sched: [76:8.00]
-; SKX-NEXT: divw (%r9) # sched: [29:10.00]
-; SKX-NEXT: divl %edx # sched: [76:8.00]
-; SKX-NEXT: divl (%rax) # sched: [29:10.00]
-; SKX-NEXT: divq %rcx # sched: [76:8.00]
-; SKX-NEXT: divq (%r10) # sched: [29:10.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_div:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
-; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: divb %dil # sched: [12:12.00]
-; BDVER2-NEXT: divb (%r8) # sched: [16:12.00]
-; BDVER2-NEXT: divw %si # sched: [15:15.00]
-; BDVER2-NEXT: divw (%r9) # sched: [19:15.00]
-; BDVER2-NEXT: divl %edx # sched: [14:14.00]
-; BDVER2-NEXT: divl (%rax) # sched: [18:14.00]
-; BDVER2-NEXT: divq %rcx # sched: [14:14.00]
-; BDVER2-NEXT: divq (%r10) # sched: [18:14.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_div:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [3:1.00]
-; BTVER2-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: divb %dil # sched: [12:12.00]
-; BTVER2-NEXT: divb (%r8) # sched: [15:12.00]
-; BTVER2-NEXT: divw %si # sched: [17:17.00]
-; BTVER2-NEXT: divw (%r9) # sched: [20:17.00]
-; BTVER2-NEXT: divl %edx # sched: [25:25.00]
-; BTVER2-NEXT: divl (%rax) # sched: [28:25.00]
-; BTVER2-NEXT: divq %rcx # sched: [41:41.00]
-; BTVER2-NEXT: divq (%r10) # sched: [44:41.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_div:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [8:0.50]
-; ZNVER1-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: divb %dil # sched: [15:15.00]
-; ZNVER1-NEXT: divb (%r8) # sched: [19:15.00]
-; ZNVER1-NEXT: divw %si # sched: [17:17.00]
-; ZNVER1-NEXT: divw (%r9) # sched: [21:17.00]
-; ZNVER1-NEXT: divl %edx # sched: [25:25.00]
-; ZNVER1-NEXT: divl (%rax) # sched: [29:25.00]
-; ZNVER1-NEXT: divq %rcx # sched: [41:41.00]
-; ZNVER1-NEXT: divq (%r10) # sched: [45:41.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "divb $0 \0A\09 divb $4 \0A\09 divw $1 \0A\09 divw $5 \0A\09 divl $2 \0A\09 divl $6 \0A\09 divq $3 \0A\09 divq $7", "r,r,r,r,*m,*m,*m,*m"(i8 %a0, i16 %a1, i32 %a2, i64 %a3, i8 *%p0, i16 *%p1, i32 *%p2, i64 *%p3) nounwind
- ret void
-}
-
-define void @test_enter() optsize {
-; GENERIC-LABEL: test_enter:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: enter $7, $4095 # imm = 0xFFF
-; GENERIC-NEXT: # sched: [100:0.33]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_enter:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: enter $7, $4095 # imm = 0xFFF
-; ATOM-NEXT: # sched: [32:16.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_enter:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: enter $7, $4095 # imm = 0xFFF
-; SLM-NEXT: # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_enter:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: enter $7, $4095 # imm = 0xFFF
-; SANDY-NEXT: # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_enter:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: enter $7, $4095 # imm = 0xFFF
-; HASWELL-NEXT: # sched: [100:0.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_enter:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: enter $7, $4095 # imm = 0xFFF
-; BROADWELL-NEXT: # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_enter:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: enter $7, $4095 # imm = 0xFFF
-; SKYLAKE-NEXT: # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_enter:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: enter $7, $4095 # imm = 0xFFF
-; SKX-NEXT: # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_enter:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: enter $7, $4095 # imm = 0xFFF
-; BDVER2-NEXT: # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_enter:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: enter $7, $4095 # imm = 0xFFF
-; BTVER2-NEXT: # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_enter:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: enter $7, $4095 # imm = 0xFFF
-; ZNVER1-NEXT: # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "enter $0, $1", "i,i"(i8 7, i16 4095) nounwind
- ret void
-}
-
-define void @test_idiv(i8 %a0, i16 %a1, i32 %a2, i64 %a3, i8 *%p0, i16 *%p1, i32 *%p2, i64 *%p3) optsize {
-; GENERIC-LABEL: test_idiv:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
-; GENERIC-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: idivb %dil # sched: [25:10.00]
-; GENERIC-NEXT: idivb (%r8) # sched: [30:10.00]
-; GENERIC-NEXT: idivw %si # sched: [25:10.00]
-; GENERIC-NEXT: idivw (%r9) # sched: [30:10.00]
-; GENERIC-NEXT: idivl %edx # sched: [25:10.00]
-; GENERIC-NEXT: idivl (%rax) # sched: [30:10.00]
-; GENERIC-NEXT: idivq %rcx # sched: [25:10.00]
-; GENERIC-NEXT: idivq (%r10) # sched: [30:10.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_idiv:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [1:1.00]
-; ATOM-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: idivb %dil # sched: [62:31.00]
-; ATOM-NEXT: idivb (%r8) # sched: [62:31.00]
-; ATOM-NEXT: idivw %si # sched: [62:31.00]
-; ATOM-NEXT: idivw (%r9) # sched: [62:31.00]
-; ATOM-NEXT: idivl %edx # sched: [62:31.00]
-; ATOM-NEXT: idivl (%rax) # sched: [62:31.00]
-; ATOM-NEXT: idivq %rcx # sched: [130:65.00]
-; ATOM-NEXT: idivq (%r10) # sched: [130:65.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_idiv:
-; SLM: # %bb.0:
-; SLM-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [3:1.00]
-; SLM-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: idivb %dil # sched: [25:25.00]
-; SLM-NEXT: idivb (%r8) # sched: [29:25.00]
-; SLM-NEXT: idivw %si # sched: [25:25.00]
-; SLM-NEXT: idivw (%r9) # sched: [29:25.00]
-; SLM-NEXT: idivl %edx # sched: [25:25.00]
-; SLM-NEXT: idivl (%rax) # sched: [29:25.00]
-; SLM-NEXT: idivq %rcx # sched: [25:25.00]
-; SLM-NEXT: idivq (%r10) # sched: [29:25.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_idiv:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
-; SANDY-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: idivb %dil # sched: [25:10.00]
-; SANDY-NEXT: idivb (%r8) # sched: [30:10.00]
-; SANDY-NEXT: idivw %si # sched: [25:10.00]
-; SANDY-NEXT: idivw (%r9) # sched: [30:10.00]
-; SANDY-NEXT: idivl %edx # sched: [25:10.00]
-; SANDY-NEXT: idivl (%rax) # sched: [30:10.00]
-; SANDY-NEXT: idivq %rcx # sched: [25:10.00]
-; SANDY-NEXT: idivq (%r10) # sched: [30:10.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_idiv:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
-; HASWELL-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: idivb %dil # sched: [23:1.00]
-; HASWELL-NEXT: idivb (%r8) # sched: [29:10.00]
-; HASWELL-NEXT: idivw %si # sched: [112:16.50]
-; HASWELL-NEXT: idivw (%r9) # sched: [29:10.00]
-; HASWELL-NEXT: idivl %edx # sched: [112:16.50]
-; HASWELL-NEXT: idivl (%rax) # sched: [29:10.00]
-; HASWELL-NEXT: idivq %rcx # sched: [112:16.50]
-; HASWELL-NEXT: idivq (%r10) # sched: [29:10.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_idiv:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
-; BROADWELL-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: idivb %dil # sched: [25:10.00]
-; BROADWELL-NEXT: idivb (%r8) # sched: [35:2.00]
-; BROADWELL-NEXT: idivw %si # sched: [25:10.00]
-; BROADWELL-NEXT: idivw (%r9) # sched: [35:2.00]
-; BROADWELL-NEXT: idivl %edx # sched: [25:10.00]
-; BROADWELL-NEXT: idivl (%rax) # sched: [35:2.00]
-; BROADWELL-NEXT: idivq %rcx # sched: [25:10.00]
-; BROADWELL-NEXT: idivq (%r10) # sched: [35:2.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_idiv:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
-; SKYLAKE-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: idivb %dil # sched: [25:10.00]
-; SKYLAKE-NEXT: idivb (%r8) # sched: [28:4.00]
-; SKYLAKE-NEXT: idivw %si # sched: [102:16.50]
-; SKYLAKE-NEXT: idivw (%r9) # sched: [28:4.00]
-; SKYLAKE-NEXT: idivl %edx # sched: [102:16.50]
-; SKYLAKE-NEXT: idivl (%rax) # sched: [28:4.00]
-; SKYLAKE-NEXT: idivq %rcx # sched: [102:16.50]
-; SKYLAKE-NEXT: idivq (%r10) # sched: [28:4.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_idiv:
-; SKX: # %bb.0:
-; SKX-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
-; SKX-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: idivb %dil # sched: [25:10.00]
-; SKX-NEXT: idivb (%r8) # sched: [28:4.00]
-; SKX-NEXT: idivw %si # sched: [102:16.50]
-; SKX-NEXT: idivw (%r9) # sched: [28:4.00]
-; SKX-NEXT: idivl %edx # sched: [102:16.50]
-; SKX-NEXT: idivl (%rax) # sched: [28:4.00]
-; SKX-NEXT: idivq %rcx # sched: [102:16.50]
-; SKX-NEXT: idivq (%r10) # sched: [28:4.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_idiv:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
-; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: idivb %dil # sched: [12:12.00]
-; BDVER2-NEXT: idivb (%r8) # sched: [16:12.00]
-; BDVER2-NEXT: idivw %si # sched: [15:17.00]
-; BDVER2-NEXT: idivw (%r9) # sched: [19:17.00]
-; BDVER2-NEXT: idivl %edx # sched: [14:25.00]
-; BDVER2-NEXT: idivl (%rax) # sched: [18:25.00]
-; BDVER2-NEXT: idivq %rcx # sched: [14:14.00]
-; BDVER2-NEXT: idivq (%r10) # sched: [18:14.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_idiv:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [3:1.00]
-; BTVER2-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: idivb %dil # sched: [12:12.00]
-; BTVER2-NEXT: idivb (%r8) # sched: [15:12.00]
-; BTVER2-NEXT: idivw %si # sched: [17:17.00]
-; BTVER2-NEXT: idivw (%r9) # sched: [20:17.00]
-; BTVER2-NEXT: idivl %edx # sched: [25:25.00]
-; BTVER2-NEXT: idivl (%rax) # sched: [28:25.00]
-; BTVER2-NEXT: idivq %rcx # sched: [41:41.00]
-; BTVER2-NEXT: idivq (%r10) # sched: [44:41.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_idiv:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [8:0.50]
-; ZNVER1-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: idivb %dil # sched: [15:15.00]
-; ZNVER1-NEXT: idivb (%r8) # sched: [19:15.00]
-; ZNVER1-NEXT: idivw %si # sched: [17:17.00]
-; ZNVER1-NEXT: idivw (%r9) # sched: [21:17.00]
-; ZNVER1-NEXT: idivl %edx # sched: [25:25.00]
-; ZNVER1-NEXT: idivl (%rax) # sched: [29:25.00]
-; ZNVER1-NEXT: idivq %rcx # sched: [41:41.00]
-; ZNVER1-NEXT: idivq (%r10) # sched: [45:41.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "idivb $0 \0A\09 idivb $4 \0A\09 idivw $1 \0A\09 idivw $5 \0A\09 idivl $2 \0A\09 idivl $6 \0A\09 idivq $3 \0A\09 idivq $7", "r,r,r,r,*m,*m,*m,*m"(i8 %a0, i16 %a1, i32 %a2, i64 %a3, i8 *%p0, i16 *%p1, i32 *%p2, i64 *%p3) nounwind
- ret void
-}
-
-define void @test_imul_8(i8 %a0, i8* %a1) optsize {
-; GENERIC-LABEL: test_imul_8:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: imulb %dil # sched: [3:1.00]
-; GENERIC-NEXT: imulb (%rsi) # sched: [8:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_imul_8:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: imulb %dil # sched: [7:3.50]
-; ATOM-NEXT: imulb (%rsi) # sched: [7:3.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_imul_8:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: imulb %dil # sched: [3:1.00]
-; SLM-NEXT: imulb (%rsi) # sched: [6:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_imul_8:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: imulb %dil # sched: [3:1.00]
-; SANDY-NEXT: imulb (%rsi) # sched: [8:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_imul_8:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: imulb %dil # sched: [3:1.00]
-; HASWELL-NEXT: imulb (%rsi) # sched: [8:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_imul_8:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: imulb %dil # sched: [3:1.00]
-; BROADWELL-NEXT: imulb (%rsi) # sched: [8:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_imul_8:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: imulb %dil # sched: [3:1.00]
-; SKYLAKE-NEXT: imulb (%rsi) # sched: [8:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_imul_8:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: imulb %dil # sched: [3:1.00]
-; SKX-NEXT: imulb (%rsi) # sched: [8:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_imul_8:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: imulb %dil # sched: [4:1.00]
-; BDVER2-NEXT: imulb (%rsi) # sched: [8:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_imul_8:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: imulb %dil # sched: [3:1.00]
-; BTVER2-NEXT: imulb (%rsi) # sched: [6:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_imul_8:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: imulb %dil # sched: [4:1.00]
-; ZNVER1-NEXT: imulb (%rsi) # sched: [8:1.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "imulb $0 \0A\09 imulb $1", "r,*m"(i8 %a0, i8* %a1) nounwind
- ret void
-}
-define void @test_imul_16(i16 %a0, i16* %a1, i16 %a2) optsize {
-; GENERIC-LABEL: test_imul_16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: imulw %di # sched: [4:1.33]
-; GENERIC-NEXT: imulw (%rsi) # sched: [9:1.33]
-; GENERIC-NEXT: imulw %dx, %di # sched: [3:1.00]
-; GENERIC-NEXT: imulw (%rsi), %di # sched: [8:1.00]
-; GENERIC-NEXT: imulw $511, %di, %di # imm = 0x1FF
-; GENERIC-NEXT: # sched: [4:1.00]
-; GENERIC-NEXT: imulw $511, (%rsi), %di # imm = 0x1FF
-; GENERIC-NEXT: # sched: [8:1.00]
-; GENERIC-NEXT: imulw $7, %di, %di # sched: [4:1.00]
-; GENERIC-NEXT: imulw $7, (%rsi), %di # sched: [8:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_imul_16:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: imulw %di # sched: [7:3.50]
-; ATOM-NEXT: imulw (%rsi) # sched: [8:4.00]
-; ATOM-NEXT: imulw %dx, %di # sched: [6:3.00]
-; ATOM-NEXT: imulw (%rsi), %di # sched: [7:3.50]
-; ATOM-NEXT: imulw $511, %di, %di # imm = 0x1FF
-; ATOM-NEXT: # sched: [6:3.00]
-; ATOM-NEXT: imulw $511, (%rsi), %di # imm = 0x1FF
-; ATOM-NEXT: # sched: [7:3.50]
-; ATOM-NEXT: imulw $7, %di, %di # sched: [6:3.00]
-; ATOM-NEXT: imulw $7, (%rsi), %di # sched: [7:3.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_imul_16:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: imulw %di # sched: [3:1.00]
-; SLM-NEXT: imulw (%rsi) # sched: [6:1.00]
-; SLM-NEXT: imulw %dx, %di # sched: [3:1.00]
-; SLM-NEXT: imulw (%rsi), %di # sched: [6:1.00]
-; SLM-NEXT: imulw $511, %di, %di # imm = 0x1FF
-; SLM-NEXT: # sched: [3:1.00]
-; SLM-NEXT: imulw $511, (%rsi), %di # imm = 0x1FF
-; SLM-NEXT: # sched: [6:1.00]
-; SLM-NEXT: imulw $7, %di, %di # sched: [3:1.00]
-; SLM-NEXT: imulw $7, (%rsi), %di # sched: [6:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_imul_16:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: imulw %di # sched: [4:1.33]
-; SANDY-NEXT: imulw (%rsi) # sched: [9:1.33]
-; SANDY-NEXT: imulw %dx, %di # sched: [3:1.00]
-; SANDY-NEXT: imulw (%rsi), %di # sched: [8:1.00]
-; SANDY-NEXT: imulw $511, %di, %di # imm = 0x1FF
-; SANDY-NEXT: # sched: [4:1.00]
-; SANDY-NEXT: imulw $511, (%rsi), %di # imm = 0x1FF
-; SANDY-NEXT: # sched: [8:1.00]
-; SANDY-NEXT: imulw $7, %di, %di # sched: [4:1.00]
-; SANDY-NEXT: imulw $7, (%rsi), %di # sched: [8:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_imul_16:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: imulw %di # sched: [4:1.00]
-; HASWELL-NEXT: imulw (%rsi) # sched: [9:1.00]
-; HASWELL-NEXT: imulw %dx, %di # sched: [3:1.00]
-; HASWELL-NEXT: imulw (%rsi), %di # sched: [8:1.00]
-; HASWELL-NEXT: imulw $511, %di, %di # imm = 0x1FF
-; HASWELL-NEXT: # sched: [4:1.00]
-; HASWELL-NEXT: imulw $511, (%rsi), %di # imm = 0x1FF
-; HASWELL-NEXT: # sched: [8:1.00]
-; HASWELL-NEXT: imulw $7, %di, %di # sched: [4:1.00]
-; HASWELL-NEXT: imulw $7, (%rsi), %di # sched: [8:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_imul_16:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: imulw %di # sched: [4:1.00]
-; BROADWELL-NEXT: imulw (%rsi) # sched: [9:1.00]
-; BROADWELL-NEXT: imulw %dx, %di # sched: [3:1.00]
-; BROADWELL-NEXT: imulw (%rsi), %di # sched: [8:1.00]
-; BROADWELL-NEXT: imulw $511, %di, %di # imm = 0x1FF
-; BROADWELL-NEXT: # sched: [4:1.00]
-; BROADWELL-NEXT: imulw $511, (%rsi), %di # imm = 0x1FF
-; BROADWELL-NEXT: # sched: [8:1.00]
-; BROADWELL-NEXT: imulw $7, %di, %di # sched: [4:1.00]
-; BROADWELL-NEXT: imulw $7, (%rsi), %di # sched: [8:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_imul_16:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: imulw %di # sched: [4:1.00]
-; SKYLAKE-NEXT: imulw (%rsi) # sched: [9:1.00]
-; SKYLAKE-NEXT: imulw %dx, %di # sched: [3:1.00]
-; SKYLAKE-NEXT: imulw (%rsi), %di # sched: [8:1.00]
-; SKYLAKE-NEXT: imulw $511, %di, %di # imm = 0x1FF
-; SKYLAKE-NEXT: # sched: [4:1.00]
-; SKYLAKE-NEXT: imulw $511, (%rsi), %di # imm = 0x1FF
-; SKYLAKE-NEXT: # sched: [8:1.00]
-; SKYLAKE-NEXT: imulw $7, %di, %di # sched: [4:1.00]
-; SKYLAKE-NEXT: imulw $7, (%rsi), %di # sched: [8:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_imul_16:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: imulw %di # sched: [4:1.00]
-; SKX-NEXT: imulw (%rsi) # sched: [9:1.00]
-; SKX-NEXT: imulw %dx, %di # sched: [3:1.00]
-; SKX-NEXT: imulw (%rsi), %di # sched: [8:1.00]
-; SKX-NEXT: imulw $511, %di, %di # imm = 0x1FF
-; SKX-NEXT: # sched: [4:1.00]
-; SKX-NEXT: imulw $511, (%rsi), %di # imm = 0x1FF
-; SKX-NEXT: # sched: [8:1.00]
-; SKX-NEXT: imulw $7, %di, %di # sched: [4:1.00]
-; SKX-NEXT: imulw $7, (%rsi), %di # sched: [8:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_imul_16:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: imulw %di # sched: [4:1.00]
-; BDVER2-NEXT: imulw (%rsi) # sched: [8:1.00]
-; BDVER2-NEXT: imulw %dx, %di # sched: [4:1.00]
-; BDVER2-NEXT: imulw (%rsi), %di # sched: [8:1.00]
-; BDVER2-NEXT: imulw $511, %di, %di # imm = 0x1FF
-; BDVER2-NEXT: # sched: [5:1.00]
-; BDVER2-NEXT: imulw $511, (%rsi), %di # imm = 0x1FF
-; BDVER2-NEXT: # sched: [9:1.00]
-; BDVER2-NEXT: imulw $7, %di, %di # sched: [5:1.00]
-; BDVER2-NEXT: imulw $7, (%rsi), %di # sched: [9:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_imul_16:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: imulw %di # sched: [3:1.00]
-; BTVER2-NEXT: imulw (%rsi) # sched: [6:1.00]
-; BTVER2-NEXT: imulw %dx, %di # sched: [3:1.00]
-; BTVER2-NEXT: imulw (%rsi), %di # sched: [6:1.00]
-; BTVER2-NEXT: imulw $511, %di, %di # imm = 0x1FF
-; BTVER2-NEXT: # sched: [3:1.00]
-; BTVER2-NEXT: imulw $511, (%rsi), %di # imm = 0x1FF
-; BTVER2-NEXT: # sched: [6:1.00]
-; BTVER2-NEXT: imulw $7, %di, %di # sched: [3:1.00]
-; BTVER2-NEXT: imulw $7, (%rsi), %di # sched: [6:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_imul_16:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: imulw %di # sched: [3:1.00]
-; ZNVER1-NEXT: imulw (%rsi) # sched: [8:1.00]
-; ZNVER1-NEXT: imulw %dx, %di # sched: [3:1.00]
-; ZNVER1-NEXT: imulw (%rsi), %di # sched: [3:1.00]
-; ZNVER1-NEXT: imulw $511, %di, %di # imm = 0x1FF
-; ZNVER1-NEXT: # sched: [3:1.00]
-; ZNVER1-NEXT: imulw $511, (%rsi), %di # imm = 0x1FF
-; ZNVER1-NEXT: # sched: [3:1.00]
-; ZNVER1-NEXT: imulw $7, %di, %di # sched: [3:1.00]
-; ZNVER1-NEXT: imulw $7, (%rsi), %di # sched: [3:1.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "imulw $0 \0A\09 imulw $2 \0A\09 imulw $1, $0 \0A\09 imulw $2, $0 \0A\09 imulw $3, $0, $0 \0A\09 imulw $3, $2, $0 \0A\09 imulw $4, $0, $0 \0A\09 imulw $4, $2, $0", "r,r,*m,i,i"(i16 %a0, i16 %a2, i16* %a1, i16 511, i8 7) nounwind
- ret void
-}
-define void @test_imul_32(i32 %a0, i32* %a1, i32 %a2) optsize {
-; GENERIC-LABEL: test_imul_32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: imull %edi # sched: [4:1.00]
-; GENERIC-NEXT: imull (%rsi) # sched: [9:1.00]
-; GENERIC-NEXT: imull %edx, %edi # sched: [3:1.00]
-; GENERIC-NEXT: imull (%rsi), %edi # sched: [8:1.00]
-; GENERIC-NEXT: imull $665536, %edi, %edi # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [3:1.00]
-; GENERIC-NEXT: imull $665536, (%rsi), %edi # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [8:1.00]
-; GENERIC-NEXT: imull $7, %edi, %edi # sched: [3:1.00]
-; GENERIC-NEXT: imull $7, (%rsi), %edi # sched: [8:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_imul_32:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: imull %edi # sched: [6:3.00]
-; ATOM-NEXT: imull (%rsi) # sched: [7:3.50]
-; ATOM-NEXT: imull %edx, %edi # sched: [5:5.00]
-; ATOM-NEXT: imull (%rsi), %edi # sched: [5:5.00]
-; ATOM-NEXT: imull $665536, %edi, %edi # imm = 0xA27C0
-; ATOM-NEXT: # sched: [5:5.00]
-; ATOM-NEXT: imull $665536, (%rsi), %edi # imm = 0xA27C0
-; ATOM-NEXT: # sched: [5:5.00]
-; ATOM-NEXT: imull $7, %edi, %edi # sched: [5:5.00]
-; ATOM-NEXT: imull $7, (%rsi), %edi # sched: [5:5.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_imul_32:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: imull %edi # sched: [3:1.00]
-; SLM-NEXT: imull (%rsi) # sched: [6:1.00]
-; SLM-NEXT: imull %edx, %edi # sched: [3:1.00]
-; SLM-NEXT: imull (%rsi), %edi # sched: [6:1.00]
-; SLM-NEXT: imull $665536, %edi, %edi # imm = 0xA27C0
-; SLM-NEXT: # sched: [3:1.00]
-; SLM-NEXT: imull $665536, (%rsi), %edi # imm = 0xA27C0
-; SLM-NEXT: # sched: [6:1.00]
-; SLM-NEXT: imull $7, %edi, %edi # sched: [3:1.00]
-; SLM-NEXT: imull $7, (%rsi), %edi # sched: [6:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_imul_32:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: imull %edi # sched: [4:1.00]
-; SANDY-NEXT: imull (%rsi) # sched: [9:1.00]
-; SANDY-NEXT: imull %edx, %edi # sched: [3:1.00]
-; SANDY-NEXT: imull (%rsi), %edi # sched: [8:1.00]
-; SANDY-NEXT: imull $665536, %edi, %edi # imm = 0xA27C0
-; SANDY-NEXT: # sched: [3:1.00]
-; SANDY-NEXT: imull $665536, (%rsi), %edi # imm = 0xA27C0
-; SANDY-NEXT: # sched: [8:1.00]
-; SANDY-NEXT: imull $7, %edi, %edi # sched: [3:1.00]
-; SANDY-NEXT: imull $7, (%rsi), %edi # sched: [8:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_imul_32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: imull %edi # sched: [4:1.00]
-; HASWELL-NEXT: imull (%rsi) # sched: [9:1.00]
-; HASWELL-NEXT: imull %edx, %edi # sched: [3:1.00]
-; HASWELL-NEXT: imull (%rsi), %edi # sched: [8:1.00]
-; HASWELL-NEXT: imull $665536, %edi, %edi # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [3:1.00]
-; HASWELL-NEXT: imull $665536, (%rsi), %edi # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [8:1.00]
-; HASWELL-NEXT: imull $7, %edi, %edi # sched: [3:1.00]
-; HASWELL-NEXT: imull $7, (%rsi), %edi # sched: [8:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_imul_32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: imull %edi # sched: [4:1.00]
-; BROADWELL-NEXT: imull (%rsi) # sched: [9:1.00]
-; BROADWELL-NEXT: imull %edx, %edi # sched: [3:1.00]
-; BROADWELL-NEXT: imull (%rsi), %edi # sched: [8:1.00]
-; BROADWELL-NEXT: imull $665536, %edi, %edi # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [3:1.00]
-; BROADWELL-NEXT: imull $665536, (%rsi), %edi # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [8:1.00]
-; BROADWELL-NEXT: imull $7, %edi, %edi # sched: [3:1.00]
-; BROADWELL-NEXT: imull $7, (%rsi), %edi # sched: [8:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_imul_32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: imull %edi # sched: [4:1.00]
-; SKYLAKE-NEXT: imull (%rsi) # sched: [9:1.00]
-; SKYLAKE-NEXT: imull %edx, %edi # sched: [3:1.00]
-; SKYLAKE-NEXT: imull (%rsi), %edi # sched: [8:1.00]
-; SKYLAKE-NEXT: imull $665536, %edi, %edi # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [3:1.00]
-; SKYLAKE-NEXT: imull $665536, (%rsi), %edi # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [8:1.00]
-; SKYLAKE-NEXT: imull $7, %edi, %edi # sched: [3:1.00]
-; SKYLAKE-NEXT: imull $7, (%rsi), %edi # sched: [8:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_imul_32:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: imull %edi # sched: [4:1.00]
-; SKX-NEXT: imull (%rsi) # sched: [9:1.00]
-; SKX-NEXT: imull %edx, %edi # sched: [3:1.00]
-; SKX-NEXT: imull (%rsi), %edi # sched: [8:1.00]
-; SKX-NEXT: imull $665536, %edi, %edi # imm = 0xA27C0
-; SKX-NEXT: # sched: [3:1.00]
-; SKX-NEXT: imull $665536, (%rsi), %edi # imm = 0xA27C0
-; SKX-NEXT: # sched: [8:1.00]
-; SKX-NEXT: imull $7, %edi, %edi # sched: [3:1.00]
-; SKX-NEXT: imull $7, (%rsi), %edi # sched: [8:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_imul_32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: imull %edi # sched: [4:1.00]
-; BDVER2-NEXT: imull (%rsi) # sched: [8:1.00]
-; BDVER2-NEXT: imull %edx, %edi # sched: [4:1.00]
-; BDVER2-NEXT: imull (%rsi), %edi # sched: [8:1.00]
-; BDVER2-NEXT: imull $665536, %edi, %edi # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [4:1.00]
-; BDVER2-NEXT: imull $665536, (%rsi), %edi # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [8:1.00]
-; BDVER2-NEXT: imull $7, %edi, %edi # sched: [4:1.00]
-; BDVER2-NEXT: imull $7, (%rsi), %edi # sched: [8:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_imul_32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: imull %edi # sched: [3:1.00]
-; BTVER2-NEXT: imull (%rsi) # sched: [6:1.00]
-; BTVER2-NEXT: imull %edx, %edi # sched: [3:1.00]
-; BTVER2-NEXT: imull (%rsi), %edi # sched: [6:1.00]
-; BTVER2-NEXT: imull $665536, %edi, %edi # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [3:1.00]
-; BTVER2-NEXT: imull $665536, (%rsi), %edi # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [6:1.00]
-; BTVER2-NEXT: imull $7, %edi, %edi # sched: [3:1.00]
-; BTVER2-NEXT: imull $7, (%rsi), %edi # sched: [6:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_imul_32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: imull %edi # sched: [3:1.00]
-; ZNVER1-NEXT: imull (%rsi) # sched: [8:1.00]
-; ZNVER1-NEXT: imull %edx, %edi # sched: [3:1.00]
-; ZNVER1-NEXT: imull (%rsi), %edi # sched: [3:1.00]
-; ZNVER1-NEXT: imull $665536, %edi, %edi # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [3:1.00]
-; ZNVER1-NEXT: imull $665536, (%rsi), %edi # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [3:1.00]
-; ZNVER1-NEXT: imull $7, %edi, %edi # sched: [3:1.00]
-; ZNVER1-NEXT: imull $7, (%rsi), %edi # sched: [3:1.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "imull $0 \0A\09 imull $2 \0A\09 imull $1, $0 \0A\09 imull $2, $0 \0A\09 imull $3, $0, $0 \0A\09 imull $3, $2, $0 \0A\09 imull $4, $0, $0 \0A\09 imull $4, $2, $0", "r,r,*m,i,i"(i32 %a0, i32 %a2, i32* %a1, i32 665536, i8 7) nounwind
- ret void
-}
-define void @test_imul_64(i64 %a0, i64* %a1, i64 %a2) optsize {
-; GENERIC-LABEL: test_imul_64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: imulq %rdi # sched: [4:1.00]
-; GENERIC-NEXT: imulq (%rsi) # sched: [9:1.00]
-; GENERIC-NEXT: imulq %rdx, %rdi # sched: [3:1.00]
-; GENERIC-NEXT: imulq (%rsi), %rdi # sched: [8:1.00]
-; GENERIC-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [3:1.00]
-; GENERIC-NEXT: imulq $665536, (%rsi), %rdi # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [8:1.00]
-; GENERIC-NEXT: imulq $7, %rdi, %rdi # sched: [3:1.00]
-; GENERIC-NEXT: imulq $7, (%rsi), %rdi # sched: [8:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_imul_64:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: imulq %rdi # sched: [12:6.00]
-; ATOM-NEXT: imulq (%rsi) # sched: [12:6.00]
-; ATOM-NEXT: imulq %rdx, %rdi # sched: [12:6.00]
-; ATOM-NEXT: imulq (%rsi), %rdi # sched: [12:6.00]
-; ATOM-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0
-; ATOM-NEXT: # sched: [14:7.00]
-; ATOM-NEXT: imulq $665536, (%rsi), %rdi # imm = 0xA27C0
-; ATOM-NEXT: # sched: [14:7.00]
-; ATOM-NEXT: imulq $7, %rdi, %rdi # sched: [14:7.00]
-; ATOM-NEXT: imulq $7, (%rsi), %rdi # sched: [14:7.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_imul_64:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: imulq %rdi # sched: [3:1.00]
-; SLM-NEXT: imulq (%rsi) # sched: [6:1.00]
-; SLM-NEXT: imulq %rdx, %rdi # sched: [3:1.00]
-; SLM-NEXT: imulq (%rsi), %rdi # sched: [6:1.00]
-; SLM-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0
-; SLM-NEXT: # sched: [3:1.00]
-; SLM-NEXT: imulq $665536, (%rsi), %rdi # imm = 0xA27C0
-; SLM-NEXT: # sched: [6:1.00]
-; SLM-NEXT: imulq $7, %rdi, %rdi # sched: [3:1.00]
-; SLM-NEXT: imulq $7, (%rsi), %rdi # sched: [6:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_imul_64:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: imulq %rdi # sched: [4:1.00]
-; SANDY-NEXT: imulq (%rsi) # sched: [9:1.00]
-; SANDY-NEXT: imulq %rdx, %rdi # sched: [3:1.00]
-; SANDY-NEXT: imulq (%rsi), %rdi # sched: [8:1.00]
-; SANDY-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0
-; SANDY-NEXT: # sched: [3:1.00]
-; SANDY-NEXT: imulq $665536, (%rsi), %rdi # imm = 0xA27C0
-; SANDY-NEXT: # sched: [8:1.00]
-; SANDY-NEXT: imulq $7, %rdi, %rdi # sched: [3:1.00]
-; SANDY-NEXT: imulq $7, (%rsi), %rdi # sched: [8:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_imul_64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: imulq %rdi # sched: [4:1.00]
-; HASWELL-NEXT: imulq (%rsi) # sched: [9:1.00]
-; HASWELL-NEXT: imulq %rdx, %rdi # sched: [3:1.00]
-; HASWELL-NEXT: imulq (%rsi), %rdi # sched: [8:1.00]
-; HASWELL-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [3:1.00]
-; HASWELL-NEXT: imulq $665536, (%rsi), %rdi # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [8:1.00]
-; HASWELL-NEXT: imulq $7, %rdi, %rdi # sched: [3:1.00]
-; HASWELL-NEXT: imulq $7, (%rsi), %rdi # sched: [8:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_imul_64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: imulq %rdi # sched: [4:1.00]
-; BROADWELL-NEXT: imulq (%rsi) # sched: [9:1.00]
-; BROADWELL-NEXT: imulq %rdx, %rdi # sched: [3:1.00]
-; BROADWELL-NEXT: imulq (%rsi), %rdi # sched: [8:1.00]
-; BROADWELL-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [3:1.00]
-; BROADWELL-NEXT: imulq $665536, (%rsi), %rdi # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [8:1.00]
-; BROADWELL-NEXT: imulq $7, %rdi, %rdi # sched: [3:1.00]
-; BROADWELL-NEXT: imulq $7, (%rsi), %rdi # sched: [8:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_imul_64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: imulq %rdi # sched: [4:1.00]
-; SKYLAKE-NEXT: imulq (%rsi) # sched: [9:1.00]
-; SKYLAKE-NEXT: imulq %rdx, %rdi # sched: [3:1.00]
-; SKYLAKE-NEXT: imulq (%rsi), %rdi # sched: [8:1.00]
-; SKYLAKE-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [3:1.00]
-; SKYLAKE-NEXT: imulq $665536, (%rsi), %rdi # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [8:1.00]
-; SKYLAKE-NEXT: imulq $7, %rdi, %rdi # sched: [3:1.00]
-; SKYLAKE-NEXT: imulq $7, (%rsi), %rdi # sched: [8:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_imul_64:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: imulq %rdi # sched: [4:1.00]
-; SKX-NEXT: imulq (%rsi) # sched: [9:1.00]
-; SKX-NEXT: imulq %rdx, %rdi # sched: [3:1.00]
-; SKX-NEXT: imulq (%rsi), %rdi # sched: [8:1.00]
-; SKX-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0
-; SKX-NEXT: # sched: [3:1.00]
-; SKX-NEXT: imulq $665536, (%rsi), %rdi # imm = 0xA27C0
-; SKX-NEXT: # sched: [8:1.00]
-; SKX-NEXT: imulq $7, %rdi, %rdi # sched: [3:1.00]
-; SKX-NEXT: imulq $7, (%rsi), %rdi # sched: [8:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_imul_64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: imulq %rdi # sched: [6:4.00]
-; BDVER2-NEXT: imulq (%rsi) # sched: [10:4.00]
-; BDVER2-NEXT: imulq %rdx, %rdi # sched: [6:4.00]
-; BDVER2-NEXT: imulq (%rsi), %rdi # sched: [10:4.00]
-; BDVER2-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [6:4.00]
-; BDVER2-NEXT: imulq $665536, (%rsi), %rdi # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [10:4.00]
-; BDVER2-NEXT: imulq $7, %rdi, %rdi # sched: [6:4.00]
-; BDVER2-NEXT: imulq $7, (%rsi), %rdi # sched: [10:4.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_imul_64:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: imulq %rdi # sched: [6:4.00]
-; BTVER2-NEXT: imulq (%rsi) # sched: [9:4.00]
-; BTVER2-NEXT: imulq %rdx, %rdi # sched: [6:4.00]
-; BTVER2-NEXT: imulq (%rsi), %rdi # sched: [9:4.00]
-; BTVER2-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [6:4.00]
-; BTVER2-NEXT: imulq $665536, (%rsi), %rdi # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [9:4.00]
-; BTVER2-NEXT: imulq $7, %rdi, %rdi # sched: [6:4.00]
-; BTVER2-NEXT: imulq $7, (%rsi), %rdi # sched: [9:4.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_imul_64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: imulq %rdi # sched: [4:1.00]
-; ZNVER1-NEXT: imulq (%rsi) # sched: [9:1.00]
-; ZNVER1-NEXT: imulq %rdx, %rdi # sched: [4:1.00]
-; ZNVER1-NEXT: imulq (%rsi), %rdi # sched: [4:1.00]
-; ZNVER1-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [4:1.00]
-; ZNVER1-NEXT: imulq $665536, (%rsi), %rdi # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [4:1.00]
-; ZNVER1-NEXT: imulq $7, %rdi, %rdi # sched: [4:1.00]
-; ZNVER1-NEXT: imulq $7, (%rsi), %rdi # sched: [4:1.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "imulq $0 \0A\09 imulq $2 \0A\09 imulq $1, $0 \0A\09 imulq $2, $0 \0A\09 imulq $3, $0, $0 \0A\09 imulq $3, $2, $0 \0A\09 imulq $4, $0, $0 \0A\09 imulq $4, $2, $0", "r,r,*m,i,i"(i64 %a0, i64 %a2, i64* %a1, i32 665536, i8 7) nounwind
- ret void
-}
-
-define void @test_in() optsize {
-; GENERIC-LABEL: test_in:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: inb $7, %al # sched: [100:0.33]
-; GENERIC-NEXT: inw $7, %ax # sched: [100:0.33]
-; GENERIC-NEXT: inl $7, %eax # sched: [100:0.33]
-; GENERIC-NEXT: inb %dx, %al # sched: [100:0.33]
-; GENERIC-NEXT: inw %dx, %ax # sched: [100:0.33]
-; GENERIC-NEXT: inl %dx, %eax # sched: [100:0.33]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_in:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: inb $7, %al # sched: [92:46.00]
-; ATOM-NEXT: inw $7, %ax # sched: [92:46.00]
-; ATOM-NEXT: inl $7, %eax # sched: [92:46.00]
-; ATOM-NEXT: inb %dx, %al # sched: [94:47.00]
-; ATOM-NEXT: inw %dx, %ax # sched: [94:47.00]
-; ATOM-NEXT: inl %dx, %eax # sched: [94:47.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_in:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: inb $7, %al # sched: [100:1.00]
-; SLM-NEXT: inw $7, %ax # sched: [100:1.00]
-; SLM-NEXT: inl $7, %eax # sched: [100:1.00]
-; SLM-NEXT: inb %dx, %al # sched: [100:1.00]
-; SLM-NEXT: inw %dx, %ax # sched: [100:1.00]
-; SLM-NEXT: inl %dx, %eax # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_in:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: inb $7, %al # sched: [100:0.33]
-; SANDY-NEXT: inw $7, %ax # sched: [100:0.33]
-; SANDY-NEXT: inl $7, %eax # sched: [100:0.33]
-; SANDY-NEXT: inb %dx, %al # sched: [100:0.33]
-; SANDY-NEXT: inw %dx, %ax # sched: [100:0.33]
-; SANDY-NEXT: inl %dx, %eax # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_in:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: inb $7, %al # sched: [35:5.00]
-; HASWELL-NEXT: inw $7, %ax # sched: [35:5.00]
-; HASWELL-NEXT: inl $7, %eax # sched: [35:5.00]
-; HASWELL-NEXT: inb %dx, %al # sched: [35:5.00]
-; HASWELL-NEXT: inw %dx, %ax # sched: [35:5.00]
-; HASWELL-NEXT: inl %dx, %eax # sched: [35:5.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_in:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: inb $7, %al # sched: [34:5.00]
-; BROADWELL-NEXT: inw $7, %ax # sched: [34:5.00]
-; BROADWELL-NEXT: inl $7, %eax # sched: [34:5.00]
-; BROADWELL-NEXT: inb %dx, %al # sched: [34:5.00]
-; BROADWELL-NEXT: inw %dx, %ax # sched: [34:5.00]
-; BROADWELL-NEXT: inl %dx, %eax # sched: [34:5.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_in:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: inb $7, %al # sched: [35:5.00]
-; SKYLAKE-NEXT: inw $7, %ax # sched: [35:5.00]
-; SKYLAKE-NEXT: inl $7, %eax # sched: [35:5.00]
-; SKYLAKE-NEXT: inb %dx, %al # sched: [35:5.00]
-; SKYLAKE-NEXT: inw %dx, %ax # sched: [35:5.00]
-; SKYLAKE-NEXT: inl %dx, %eax # sched: [35:5.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_in:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: inb $7, %al # sched: [35:5.00]
-; SKX-NEXT: inw $7, %ax # sched: [35:5.00]
-; SKX-NEXT: inl $7, %eax # sched: [35:5.00]
-; SKX-NEXT: inb %dx, %al # sched: [35:5.00]
-; SKX-NEXT: inw %dx, %ax # sched: [35:5.00]
-; SKX-NEXT: inl %dx, %eax # sched: [35:5.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_in:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: inb $7, %al # sched: [100:0.50]
-; BDVER2-NEXT: inw $7, %ax # sched: [100:0.50]
-; BDVER2-NEXT: inl $7, %eax # sched: [100:0.50]
-; BDVER2-NEXT: inb %dx, %al # sched: [100:0.50]
-; BDVER2-NEXT: inw %dx, %ax # sched: [100:0.50]
-; BDVER2-NEXT: inl %dx, %eax # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_in:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: inb $7, %al # sched: [100:0.50]
-; BTVER2-NEXT: inw $7, %ax # sched: [100:0.50]
-; BTVER2-NEXT: inl $7, %eax # sched: [100:0.50]
-; BTVER2-NEXT: inb %dx, %al # sched: [100:0.50]
-; BTVER2-NEXT: inw %dx, %ax # sched: [100:0.50]
-; BTVER2-NEXT: inl %dx, %eax # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_in:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: inb $7, %al # sched: [100:0.25]
-; ZNVER1-NEXT: inw $7, %ax # sched: [100:0.25]
-; ZNVER1-NEXT: inl $7, %eax # sched: [100:0.25]
-; ZNVER1-NEXT: inb %dx, %al # sched: [100:0.25]
-; ZNVER1-NEXT: inw %dx, %ax # sched: [100:0.25]
-; ZNVER1-NEXT: inl %dx, %eax # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "inb $0, %AL \0A\09 inw $0, %AX \0A\09 inl $0, %EAX \0A\09 inb %DX, %AL \0A\09 inw %DX, %AX \0A\09 inl %DX, %EAX", "i"(i8 7) nounwind
- ret void
-}
-
-define void @test_inc8(i8 %a0, i8* %a1) optsize {
-; GENERIC-LABEL: test_inc8:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: incb %dil # sched: [1:0.33]
-; GENERIC-NEXT: incb (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_inc8:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: incb %dil # sched: [1:0.50]
-; ATOM-NEXT: incb (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_inc8:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: incb %dil # sched: [1:0.50]
-; SLM-NEXT: incb (%rsi) # sched: [5:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_inc8:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: incb %dil # sched: [1:0.33]
-; SANDY-NEXT: incb (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_inc8:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: incb %dil # sched: [1:0.25]
-; HASWELL-NEXT: incb (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_inc8:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: incb %dil # sched: [1:0.25]
-; BROADWELL-NEXT: incb (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_inc8:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: incb %dil # sched: [1:0.25]
-; SKYLAKE-NEXT: incb (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_inc8:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: incb %dil # sched: [1:0.25]
-; SKX-NEXT: incb (%rsi) # sched: [7:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_inc8:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: incb %dil # sched: [1:0.50]
-; BDVER2-NEXT: incb (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_inc8:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: incb %dil # sched: [1:0.50]
-; BTVER2-NEXT: incb (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_inc8:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: incb %dil # sched: [1:0.25]
-; ZNVER1-NEXT: incb (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "incb $0 \0A\09 incb $1", "r,*m"(i8 %a0, i8* %a1) nounwind
- ret void
-}
-define void @test_inc16(i16 %a0, i16* %a1) optsize {
-; GENERIC-LABEL: test_inc16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: incw %di # sched: [1:0.33]
-; GENERIC-NEXT: incw (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_inc16:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: incw %di # sched: [1:0.50]
-; ATOM-NEXT: incw (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_inc16:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: incw %di # sched: [1:0.50]
-; SLM-NEXT: incw (%rsi) # sched: [5:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_inc16:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: incw %di # sched: [1:0.33]
-; SANDY-NEXT: incw (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_inc16:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: incw %di # sched: [1:0.25]
-; HASWELL-NEXT: incw (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_inc16:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: incw %di # sched: [1:0.25]
-; BROADWELL-NEXT: incw (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_inc16:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: incw %di # sched: [1:0.25]
-; SKYLAKE-NEXT: incw (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_inc16:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: incw %di # sched: [1:0.25]
-; SKX-NEXT: incw (%rsi) # sched: [7:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_inc16:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: incw %di # sched: [1:0.50]
-; BDVER2-NEXT: incw (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_inc16:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: incw %di # sched: [1:0.50]
-; BTVER2-NEXT: incw (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_inc16:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: incw %di # sched: [1:0.25]
-; ZNVER1-NEXT: incw (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "incw $0 \0A\09 incw $1", "r,*m"(i16 %a0, i16* %a1) nounwind
- ret void
-}
-define void @test_inc32(i32 %a0, i32* %a1) optsize {
-; GENERIC-LABEL: test_inc32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: incl %edi # sched: [1:0.33]
-; GENERIC-NEXT: incl (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_inc32:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: incl %edi # sched: [1:0.50]
-; ATOM-NEXT: incl (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_inc32:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: incl %edi # sched: [1:0.50]
-; SLM-NEXT: incl (%rsi) # sched: [5:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_inc32:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: incl %edi # sched: [1:0.33]
-; SANDY-NEXT: incl (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_inc32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: incl %edi # sched: [1:0.25]
-; HASWELL-NEXT: incl (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_inc32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: incl %edi # sched: [1:0.25]
-; BROADWELL-NEXT: incl (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_inc32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: incl %edi # sched: [1:0.25]
-; SKYLAKE-NEXT: incl (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_inc32:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: incl %edi # sched: [1:0.25]
-; SKX-NEXT: incl (%rsi) # sched: [7:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_inc32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: incl %edi # sched: [1:0.50]
-; BDVER2-NEXT: incl (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_inc32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: incl %edi # sched: [1:0.50]
-; BTVER2-NEXT: incl (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_inc32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: incl %edi # sched: [1:0.25]
-; ZNVER1-NEXT: incl (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "incl $0 \0A\09 incl $1", "r,*m"(i32 %a0, i32* %a1) nounwind
- ret void
-}
-define void @test_inc64(i64 %a0, i64* %a1) optsize {
-; GENERIC-LABEL: test_inc64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: incq %rdi # sched: [1:0.33]
-; GENERIC-NEXT: incq (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_inc64:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: incq %rdi # sched: [1:0.50]
-; ATOM-NEXT: incq (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_inc64:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: incq %rdi # sched: [1:0.50]
-; SLM-NEXT: incq (%rsi) # sched: [5:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_inc64:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: incq %rdi # sched: [1:0.33]
-; SANDY-NEXT: incq (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_inc64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: incq %rdi # sched: [1:0.25]
-; HASWELL-NEXT: incq (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_inc64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: incq %rdi # sched: [1:0.25]
-; BROADWELL-NEXT: incq (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_inc64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: incq %rdi # sched: [1:0.25]
-; SKYLAKE-NEXT: incq (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_inc64:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: incq %rdi # sched: [1:0.25]
-; SKX-NEXT: incq (%rsi) # sched: [7:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_inc64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: incq %rdi # sched: [1:0.50]
-; BDVER2-NEXT: incq (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_inc64:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: incq %rdi # sched: [1:0.50]
-; BTVER2-NEXT: incq (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_inc64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: incq %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: incq (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "incq $0 \0A\09 incq $1", "r,*m"(i64 %a0, i64* %a1) nounwind
- ret void
-}
-
-define void @test_ins() optsize {
-; GENERIC-LABEL: test_ins:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: insb %dx, %es:(%rdi) # sched: [100:0.33]
-; GENERIC-NEXT: insw %dx, %es:(%rdi) # sched: [100:0.33]
-; GENERIC-NEXT: insl %dx, %es:(%rdi) # sched: [100:0.33]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_ins:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: insb %dx, %es:(%rdi) # sched: [59:29.50]
-; ATOM-NEXT: insw %dx, %es:(%rdi) # sched: [59:29.50]
-; ATOM-NEXT: insl %dx, %es:(%rdi) # sched: [59:29.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_ins:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: insb %dx, %es:(%rdi) # sched: [100:1.00]
-; SLM-NEXT: insw %dx, %es:(%rdi) # sched: [100:1.00]
-; SLM-NEXT: insl %dx, %es:(%rdi) # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_ins:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: insb %dx, %es:(%rdi) # sched: [100:0.33]
-; SANDY-NEXT: insw %dx, %es:(%rdi) # sched: [100:0.33]
-; SANDY-NEXT: insl %dx, %es:(%rdi) # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_ins:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: insb %dx, %es:(%rdi) # sched: [21:1.25]
-; HASWELL-NEXT: insw %dx, %es:(%rdi) # sched: [21:1.25]
-; HASWELL-NEXT: insl %dx, %es:(%rdi) # sched: [21:1.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_ins:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: insb %dx, %es:(%rdi) # sched: [20:1.25]
-; BROADWELL-NEXT: insw %dx, %es:(%rdi) # sched: [20:1.25]
-; BROADWELL-NEXT: insl %dx, %es:(%rdi) # sched: [20:1.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_ins:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: insb %dx, %es:(%rdi) # sched: [20:1.25]
-; SKYLAKE-NEXT: insw %dx, %es:(%rdi) # sched: [20:1.25]
-; SKYLAKE-NEXT: insl %dx, %es:(%rdi) # sched: [20:1.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_ins:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: insb %dx, %es:(%rdi) # sched: [20:1.25]
-; SKX-NEXT: insw %dx, %es:(%rdi) # sched: [20:1.25]
-; SKX-NEXT: insl %dx, %es:(%rdi) # sched: [20:1.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_ins:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: insb %dx, %es:(%rdi) # sched: [100:0.50]
-; BDVER2-NEXT: insw %dx, %es:(%rdi) # sched: [100:0.50]
-; BDVER2-NEXT: insl %dx, %es:(%rdi) # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_ins:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: insb %dx, %es:(%rdi) # sched: [100:0.50]
-; BTVER2-NEXT: insw %dx, %es:(%rdi) # sched: [100:0.50]
-; BTVER2-NEXT: insl %dx, %es:(%rdi) # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_ins:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: insb %dx, %es:(%rdi) # sched: [100:0.25]
-; ZNVER1-NEXT: insw %dx, %es:(%rdi) # sched: [100:0.25]
-; ZNVER1-NEXT: insl %dx, %es:(%rdi) # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "insb \0A\09 insw \0A\09 insl", ""()
- ret void
-}
-
-define void @test_int() optsize {
-; GENERIC-LABEL: test_int:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: int $7 # sched: [100:0.33]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_int:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: int $7 # sched: [127:63.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_int:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: int $7 # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_int:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: int $7 # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_int:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: int $7 # sched: [100:0.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_int:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: int $7 # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_int:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: int $7 # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_int:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: int $7 # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_int:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: int $7 # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_int:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: int $7 # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_int:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: int $7 # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "int $0", "i"(i8 7)
- ret void
-}
-
-define void @test_invlpg_invlpga(i8 *%a0) optsize {
-; GENERIC-LABEL: test_invlpg_invlpga:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: invlpg (%rdi) # sched: [100:0.33]
-; GENERIC-NEXT: invlpga %rax, %ecx # sched: [100:0.33]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_invlpg_invlpga:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: invlpg (%rdi) # sched: [71:35.50]
-; ATOM-NEXT: invlpga %rax, %ecx # sched: [71:35.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_invlpg_invlpga:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: invlpg (%rdi) # sched: [100:1.00]
-; SLM-NEXT: invlpga %rax, %ecx # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_invlpg_invlpga:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: invlpg (%rdi) # sched: [100:0.33]
-; SANDY-NEXT: invlpga %rax, %ecx # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_invlpg_invlpga:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: invlpg (%rdi) # sched: [100:0.25]
-; HASWELL-NEXT: invlpga %rax, %ecx # sched: [100:0.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_invlpg_invlpga:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: invlpg (%rdi) # sched: [100:0.25]
-; BROADWELL-NEXT: invlpga %rax, %ecx # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_invlpg_invlpga:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: invlpg (%rdi) # sched: [100:0.25]
-; SKYLAKE-NEXT: invlpga %rax, %ecx # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_invlpg_invlpga:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: invlpg (%rdi) # sched: [100:0.25]
-; SKX-NEXT: invlpga %rax, %ecx # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_invlpg_invlpga:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: invlpg (%rdi) # sched: [100:0.50]
-; BDVER2-NEXT: invlpga %rax, %ecx # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_invlpg_invlpga:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: invlpg (%rdi) # sched: [100:0.50]
-; BTVER2-NEXT: invlpga %rax, %ecx # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_invlpg_invlpga:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: invlpg (%rdi) # sched: [100:0.25]
-; ZNVER1-NEXT: invlpga %rax, %ecx # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm sideeffect "invlpg $0 \0A\09 invlpga %rax, %ecx", "*m"(i8 *%a0) nounwind
- ret void
-}
-
-define void @test_jcc() optsize {
-; GENERIC-LABEL: test_jcc:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: JCCTGT:
-; GENERIC-NEXT: jo JCCTGT # sched: [1:1.00]
-; GENERIC-NEXT: jno JCCTGT # sched: [1:1.00]
-; GENERIC-NEXT: jb JCCTGT # sched: [1:1.00]
-; GENERIC-NEXT: jb JCCTGT # sched: [1:1.00]
-; GENERIC-NEXT: jb JCCTGT # sched: [1:1.00]
-; GENERIC-NEXT: jae JCCTGT # sched: [1:1.00]
-; GENERIC-NEXT: jae JCCTGT # sched: [1:1.00]
-; GENERIC-NEXT: jae JCCTGT # sched: [1:1.00]
-; GENERIC-NEXT: je JCCTGT # sched: [1:1.00]
-; GENERIC-NEXT: je JCCTGT # sched: [1:1.00]
-; GENERIC-NEXT: jne JCCTGT # sched: [1:1.00]
-; GENERIC-NEXT: jne JCCTGT # sched: [1:1.00]
-; GENERIC-NEXT: jbe JCCTGT # sched: [1:1.00]
-; GENERIC-NEXT: jbe JCCTGT # sched: [1:1.00]
-; GENERIC-NEXT: ja JCCTGT # sched: [1:1.00]
-; GENERIC-NEXT: ja JCCTGT # sched: [1:1.00]
-; GENERIC-NEXT: js JCCTGT # sched: [1:1.00]
-; GENERIC-NEXT: jns JCCTGT # sched: [1:1.00]
-; GENERIC-NEXT: jp JCCTGT # sched: [1:1.00]
-; GENERIC-NEXT: jp JCCTGT # sched: [1:1.00]
-; GENERIC-NEXT: jnp JCCTGT # sched: [1:1.00]
-; GENERIC-NEXT: jnp JCCTGT # sched: [1:1.00]
-; GENERIC-NEXT: jl JCCTGT # sched: [1:1.00]
-; GENERIC-NEXT: jl JCCTGT # sched: [1:1.00]
-; GENERIC-NEXT: jge JCCTGT # sched: [1:1.00]
-; GENERIC-NEXT: jge JCCTGT # sched: [1:1.00]
-; GENERIC-NEXT: jle JCCTGT # sched: [1:1.00]
-; GENERIC-NEXT: jle JCCTGT # sched: [1:1.00]
-; GENERIC-NEXT: jg JCCTGT # sched: [1:1.00]
-; GENERIC-NEXT: jg JCCTGT # sched: [1:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_jcc:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: JCCTGT:
-; ATOM-NEXT: jo JCCTGT # sched: [1:1.00]
-; ATOM-NEXT: jno JCCTGT # sched: [1:1.00]
-; ATOM-NEXT: jb JCCTGT # sched: [1:1.00]
-; ATOM-NEXT: jb JCCTGT # sched: [1:1.00]
-; ATOM-NEXT: jb JCCTGT # sched: [1:1.00]
-; ATOM-NEXT: jae JCCTGT # sched: [1:1.00]
-; ATOM-NEXT: jae JCCTGT # sched: [1:1.00]
-; ATOM-NEXT: jae JCCTGT # sched: [1:1.00]
-; ATOM-NEXT: je JCCTGT # sched: [1:1.00]
-; ATOM-NEXT: je JCCTGT # sched: [1:1.00]
-; ATOM-NEXT: jne JCCTGT # sched: [1:1.00]
-; ATOM-NEXT: jne JCCTGT # sched: [1:1.00]
-; ATOM-NEXT: jbe JCCTGT # sched: [1:1.00]
-; ATOM-NEXT: jbe JCCTGT # sched: [1:1.00]
-; ATOM-NEXT: ja JCCTGT # sched: [1:1.00]
-; ATOM-NEXT: ja JCCTGT # sched: [1:1.00]
-; ATOM-NEXT: js JCCTGT # sched: [1:1.00]
-; ATOM-NEXT: jns JCCTGT # sched: [1:1.00]
-; ATOM-NEXT: jp JCCTGT # sched: [1:1.00]
-; ATOM-NEXT: jp JCCTGT # sched: [1:1.00]
-; ATOM-NEXT: jnp JCCTGT # sched: [1:1.00]
-; ATOM-NEXT: jnp JCCTGT # sched: [1:1.00]
-; ATOM-NEXT: jl JCCTGT # sched: [1:1.00]
-; ATOM-NEXT: jl JCCTGT # sched: [1:1.00]
-; ATOM-NEXT: jge JCCTGT # sched: [1:1.00]
-; ATOM-NEXT: jge JCCTGT # sched: [1:1.00]
-; ATOM-NEXT: jle JCCTGT # sched: [1:1.00]
-; ATOM-NEXT: jle JCCTGT # sched: [1:1.00]
-; ATOM-NEXT: jg JCCTGT # sched: [1:1.00]
-; ATOM-NEXT: jg JCCTGT # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_jcc:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: JCCTGT:
-; SLM-NEXT: jo JCCTGT # sched: [1:1.00]
-; SLM-NEXT: jno JCCTGT # sched: [1:1.00]
-; SLM-NEXT: jb JCCTGT # sched: [1:1.00]
-; SLM-NEXT: jb JCCTGT # sched: [1:1.00]
-; SLM-NEXT: jb JCCTGT # sched: [1:1.00]
-; SLM-NEXT: jae JCCTGT # sched: [1:1.00]
-; SLM-NEXT: jae JCCTGT # sched: [1:1.00]
-; SLM-NEXT: jae JCCTGT # sched: [1:1.00]
-; SLM-NEXT: je JCCTGT # sched: [1:1.00]
-; SLM-NEXT: je JCCTGT # sched: [1:1.00]
-; SLM-NEXT: jne JCCTGT # sched: [1:1.00]
-; SLM-NEXT: jne JCCTGT # sched: [1:1.00]
-; SLM-NEXT: jbe JCCTGT # sched: [1:1.00]
-; SLM-NEXT: jbe JCCTGT # sched: [1:1.00]
-; SLM-NEXT: ja JCCTGT # sched: [1:1.00]
-; SLM-NEXT: ja JCCTGT # sched: [1:1.00]
-; SLM-NEXT: js JCCTGT # sched: [1:1.00]
-; SLM-NEXT: jns JCCTGT # sched: [1:1.00]
-; SLM-NEXT: jp JCCTGT # sched: [1:1.00]
-; SLM-NEXT: jp JCCTGT # sched: [1:1.00]
-; SLM-NEXT: jnp JCCTGT # sched: [1:1.00]
-; SLM-NEXT: jnp JCCTGT # sched: [1:1.00]
-; SLM-NEXT: jl JCCTGT # sched: [1:1.00]
-; SLM-NEXT: jl JCCTGT # sched: [1:1.00]
-; SLM-NEXT: jge JCCTGT # sched: [1:1.00]
-; SLM-NEXT: jge JCCTGT # sched: [1:1.00]
-; SLM-NEXT: jle JCCTGT # sched: [1:1.00]
-; SLM-NEXT: jle JCCTGT # sched: [1:1.00]
-; SLM-NEXT: jg JCCTGT # sched: [1:1.00]
-; SLM-NEXT: jg JCCTGT # sched: [1:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_jcc:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: JCCTGT:
-; SANDY-NEXT: jo JCCTGT # sched: [1:1.00]
-; SANDY-NEXT: jno JCCTGT # sched: [1:1.00]
-; SANDY-NEXT: jb JCCTGT # sched: [1:1.00]
-; SANDY-NEXT: jb JCCTGT # sched: [1:1.00]
-; SANDY-NEXT: jb JCCTGT # sched: [1:1.00]
-; SANDY-NEXT: jae JCCTGT # sched: [1:1.00]
-; SANDY-NEXT: jae JCCTGT # sched: [1:1.00]
-; SANDY-NEXT: jae JCCTGT # sched: [1:1.00]
-; SANDY-NEXT: je JCCTGT # sched: [1:1.00]
-; SANDY-NEXT: je JCCTGT # sched: [1:1.00]
-; SANDY-NEXT: jne JCCTGT # sched: [1:1.00]
-; SANDY-NEXT: jne JCCTGT # sched: [1:1.00]
-; SANDY-NEXT: jbe JCCTGT # sched: [1:1.00]
-; SANDY-NEXT: jbe JCCTGT # sched: [1:1.00]
-; SANDY-NEXT: ja JCCTGT # sched: [1:1.00]
-; SANDY-NEXT: ja JCCTGT # sched: [1:1.00]
-; SANDY-NEXT: js JCCTGT # sched: [1:1.00]
-; SANDY-NEXT: jns JCCTGT # sched: [1:1.00]
-; SANDY-NEXT: jp JCCTGT # sched: [1:1.00]
-; SANDY-NEXT: jp JCCTGT # sched: [1:1.00]
-; SANDY-NEXT: jnp JCCTGT # sched: [1:1.00]
-; SANDY-NEXT: jnp JCCTGT # sched: [1:1.00]
-; SANDY-NEXT: jl JCCTGT # sched: [1:1.00]
-; SANDY-NEXT: jl JCCTGT # sched: [1:1.00]
-; SANDY-NEXT: jge JCCTGT # sched: [1:1.00]
-; SANDY-NEXT: jge JCCTGT # sched: [1:1.00]
-; SANDY-NEXT: jle JCCTGT # sched: [1:1.00]
-; SANDY-NEXT: jle JCCTGT # sched: [1:1.00]
-; SANDY-NEXT: jg JCCTGT # sched: [1:1.00]
-; SANDY-NEXT: jg JCCTGT # sched: [1:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_jcc:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: JCCTGT:
-; HASWELL-NEXT: jo JCCTGT # sched: [1:0.50]
-; HASWELL-NEXT: jno JCCTGT # sched: [1:0.50]
-; HASWELL-NEXT: jb JCCTGT # sched: [1:0.50]
-; HASWELL-NEXT: jb JCCTGT # sched: [1:0.50]
-; HASWELL-NEXT: jb JCCTGT # sched: [1:0.50]
-; HASWELL-NEXT: jae JCCTGT # sched: [1:0.50]
-; HASWELL-NEXT: jae JCCTGT # sched: [1:0.50]
-; HASWELL-NEXT: jae JCCTGT # sched: [1:0.50]
-; HASWELL-NEXT: je JCCTGT # sched: [1:0.50]
-; HASWELL-NEXT: je JCCTGT # sched: [1:0.50]
-; HASWELL-NEXT: jne JCCTGT # sched: [1:0.50]
-; HASWELL-NEXT: jne JCCTGT # sched: [1:0.50]
-; HASWELL-NEXT: jbe JCCTGT # sched: [1:0.50]
-; HASWELL-NEXT: jbe JCCTGT # sched: [1:0.50]
-; HASWELL-NEXT: ja JCCTGT # sched: [1:0.50]
-; HASWELL-NEXT: ja JCCTGT # sched: [1:0.50]
-; HASWELL-NEXT: js JCCTGT # sched: [1:0.50]
-; HASWELL-NEXT: jns JCCTGT # sched: [1:0.50]
-; HASWELL-NEXT: jp JCCTGT # sched: [1:0.50]
-; HASWELL-NEXT: jp JCCTGT # sched: [1:0.50]
-; HASWELL-NEXT: jnp JCCTGT # sched: [1:0.50]
-; HASWELL-NEXT: jnp JCCTGT # sched: [1:0.50]
-; HASWELL-NEXT: jl JCCTGT # sched: [1:0.50]
-; HASWELL-NEXT: jl JCCTGT # sched: [1:0.50]
-; HASWELL-NEXT: jge JCCTGT # sched: [1:0.50]
-; HASWELL-NEXT: jge JCCTGT # sched: [1:0.50]
-; HASWELL-NEXT: jle JCCTGT # sched: [1:0.50]
-; HASWELL-NEXT: jle JCCTGT # sched: [1:0.50]
-; HASWELL-NEXT: jg JCCTGT # sched: [1:0.50]
-; HASWELL-NEXT: jg JCCTGT # sched: [1:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_jcc:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: JCCTGT:
-; BROADWELL-NEXT: jo JCCTGT # sched: [1:0.50]
-; BROADWELL-NEXT: jno JCCTGT # sched: [1:0.50]
-; BROADWELL-NEXT: jb JCCTGT # sched: [1:0.50]
-; BROADWELL-NEXT: jb JCCTGT # sched: [1:0.50]
-; BROADWELL-NEXT: jb JCCTGT # sched: [1:0.50]
-; BROADWELL-NEXT: jae JCCTGT # sched: [1:0.50]
-; BROADWELL-NEXT: jae JCCTGT # sched: [1:0.50]
-; BROADWELL-NEXT: jae JCCTGT # sched: [1:0.50]
-; BROADWELL-NEXT: je JCCTGT # sched: [1:0.50]
-; BROADWELL-NEXT: je JCCTGT # sched: [1:0.50]
-; BROADWELL-NEXT: jne JCCTGT # sched: [1:0.50]
-; BROADWELL-NEXT: jne JCCTGT # sched: [1:0.50]
-; BROADWELL-NEXT: jbe JCCTGT # sched: [1:0.50]
-; BROADWELL-NEXT: jbe JCCTGT # sched: [1:0.50]
-; BROADWELL-NEXT: ja JCCTGT # sched: [1:0.50]
-; BROADWELL-NEXT: ja JCCTGT # sched: [1:0.50]
-; BROADWELL-NEXT: js JCCTGT # sched: [1:0.50]
-; BROADWELL-NEXT: jns JCCTGT # sched: [1:0.50]
-; BROADWELL-NEXT: jp JCCTGT # sched: [1:0.50]
-; BROADWELL-NEXT: jp JCCTGT # sched: [1:0.50]
-; BROADWELL-NEXT: jnp JCCTGT # sched: [1:0.50]
-; BROADWELL-NEXT: jnp JCCTGT # sched: [1:0.50]
-; BROADWELL-NEXT: jl JCCTGT # sched: [1:0.50]
-; BROADWELL-NEXT: jl JCCTGT # sched: [1:0.50]
-; BROADWELL-NEXT: jge JCCTGT # sched: [1:0.50]
-; BROADWELL-NEXT: jge JCCTGT # sched: [1:0.50]
-; BROADWELL-NEXT: jle JCCTGT # sched: [1:0.50]
-; BROADWELL-NEXT: jle JCCTGT # sched: [1:0.50]
-; BROADWELL-NEXT: jg JCCTGT # sched: [1:0.50]
-; BROADWELL-NEXT: jg JCCTGT # sched: [1:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_jcc:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: JCCTGT:
-; SKYLAKE-NEXT: jo JCCTGT # sched: [1:0.50]
-; SKYLAKE-NEXT: jno JCCTGT # sched: [1:0.50]
-; SKYLAKE-NEXT: jb JCCTGT # sched: [1:0.50]
-; SKYLAKE-NEXT: jb JCCTGT # sched: [1:0.50]
-; SKYLAKE-NEXT: jb JCCTGT # sched: [1:0.50]
-; SKYLAKE-NEXT: jae JCCTGT # sched: [1:0.50]
-; SKYLAKE-NEXT: jae JCCTGT # sched: [1:0.50]
-; SKYLAKE-NEXT: jae JCCTGT # sched: [1:0.50]
-; SKYLAKE-NEXT: je JCCTGT # sched: [1:0.50]
-; SKYLAKE-NEXT: je JCCTGT # sched: [1:0.50]
-; SKYLAKE-NEXT: jne JCCTGT # sched: [1:0.50]
-; SKYLAKE-NEXT: jne JCCTGT # sched: [1:0.50]
-; SKYLAKE-NEXT: jbe JCCTGT # sched: [1:0.50]
-; SKYLAKE-NEXT: jbe JCCTGT # sched: [1:0.50]
-; SKYLAKE-NEXT: ja JCCTGT # sched: [1:0.50]
-; SKYLAKE-NEXT: ja JCCTGT # sched: [1:0.50]
-; SKYLAKE-NEXT: js JCCTGT # sched: [1:0.50]
-; SKYLAKE-NEXT: jns JCCTGT # sched: [1:0.50]
-; SKYLAKE-NEXT: jp JCCTGT # sched: [1:0.50]
-; SKYLAKE-NEXT: jp JCCTGT # sched: [1:0.50]
-; SKYLAKE-NEXT: jnp JCCTGT # sched: [1:0.50]
-; SKYLAKE-NEXT: jnp JCCTGT # sched: [1:0.50]
-; SKYLAKE-NEXT: jl JCCTGT # sched: [1:0.50]
-; SKYLAKE-NEXT: jl JCCTGT # sched: [1:0.50]
-; SKYLAKE-NEXT: jge JCCTGT # sched: [1:0.50]
-; SKYLAKE-NEXT: jge JCCTGT # sched: [1:0.50]
-; SKYLAKE-NEXT: jle JCCTGT # sched: [1:0.50]
-; SKYLAKE-NEXT: jle JCCTGT # sched: [1:0.50]
-; SKYLAKE-NEXT: jg JCCTGT # sched: [1:0.50]
-; SKYLAKE-NEXT: jg JCCTGT # sched: [1:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_jcc:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: JCCTGT:
-; SKX-NEXT: jo JCCTGT # sched: [1:0.50]
-; SKX-NEXT: jno JCCTGT # sched: [1:0.50]
-; SKX-NEXT: jb JCCTGT # sched: [1:0.50]
-; SKX-NEXT: jb JCCTGT # sched: [1:0.50]
-; SKX-NEXT: jb JCCTGT # sched: [1:0.50]
-; SKX-NEXT: jae JCCTGT # sched: [1:0.50]
-; SKX-NEXT: jae JCCTGT # sched: [1:0.50]
-; SKX-NEXT: jae JCCTGT # sched: [1:0.50]
-; SKX-NEXT: je JCCTGT # sched: [1:0.50]
-; SKX-NEXT: je JCCTGT # sched: [1:0.50]
-; SKX-NEXT: jne JCCTGT # sched: [1:0.50]
-; SKX-NEXT: jne JCCTGT # sched: [1:0.50]
-; SKX-NEXT: jbe JCCTGT # sched: [1:0.50]
-; SKX-NEXT: jbe JCCTGT # sched: [1:0.50]
-; SKX-NEXT: ja JCCTGT # sched: [1:0.50]
-; SKX-NEXT: ja JCCTGT # sched: [1:0.50]
-; SKX-NEXT: js JCCTGT # sched: [1:0.50]
-; SKX-NEXT: jns JCCTGT # sched: [1:0.50]
-; SKX-NEXT: jp JCCTGT # sched: [1:0.50]
-; SKX-NEXT: jp JCCTGT # sched: [1:0.50]
-; SKX-NEXT: jnp JCCTGT # sched: [1:0.50]
-; SKX-NEXT: jnp JCCTGT # sched: [1:0.50]
-; SKX-NEXT: jl JCCTGT # sched: [1:0.50]
-; SKX-NEXT: jl JCCTGT # sched: [1:0.50]
-; SKX-NEXT: jge JCCTGT # sched: [1:0.50]
-; SKX-NEXT: jge JCCTGT # sched: [1:0.50]
-; SKX-NEXT: jle JCCTGT # sched: [1:0.50]
-; SKX-NEXT: jle JCCTGT # sched: [1:0.50]
-; SKX-NEXT: jg JCCTGT # sched: [1:0.50]
-; SKX-NEXT: jg JCCTGT # sched: [1:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_jcc:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: JCCTGT:
-; BDVER2-NEXT: jo JCCTGT # sched: [1:1.00]
-; BDVER2-NEXT: jno JCCTGT # sched: [1:1.00]
-; BDVER2-NEXT: jb JCCTGT # sched: [1:1.00]
-; BDVER2-NEXT: jb JCCTGT # sched: [1:1.00]
-; BDVER2-NEXT: jb JCCTGT # sched: [1:1.00]
-; BDVER2-NEXT: jae JCCTGT # sched: [1:1.00]
-; BDVER2-NEXT: jae JCCTGT # sched: [1:1.00]
-; BDVER2-NEXT: jae JCCTGT # sched: [1:1.00]
-; BDVER2-NEXT: je JCCTGT # sched: [1:1.00]
-; BDVER2-NEXT: je JCCTGT # sched: [1:1.00]
-; BDVER2-NEXT: jne JCCTGT # sched: [1:1.00]
-; BDVER2-NEXT: jne JCCTGT # sched: [1:1.00]
-; BDVER2-NEXT: jbe JCCTGT # sched: [1:1.00]
-; BDVER2-NEXT: jbe JCCTGT # sched: [1:1.00]
-; BDVER2-NEXT: ja JCCTGT # sched: [1:1.00]
-; BDVER2-NEXT: ja JCCTGT # sched: [1:1.00]
-; BDVER2-NEXT: js JCCTGT # sched: [1:1.00]
-; BDVER2-NEXT: jns JCCTGT # sched: [1:1.00]
-; BDVER2-NEXT: jp JCCTGT # sched: [1:1.00]
-; BDVER2-NEXT: jp JCCTGT # sched: [1:1.00]
-; BDVER2-NEXT: jnp JCCTGT # sched: [1:1.00]
-; BDVER2-NEXT: jnp JCCTGT # sched: [1:1.00]
-; BDVER2-NEXT: jl JCCTGT # sched: [1:1.00]
-; BDVER2-NEXT: jl JCCTGT # sched: [1:1.00]
-; BDVER2-NEXT: jge JCCTGT # sched: [1:1.00]
-; BDVER2-NEXT: jge JCCTGT # sched: [1:1.00]
-; BDVER2-NEXT: jle JCCTGT # sched: [1:1.00]
-; BDVER2-NEXT: jle JCCTGT # sched: [1:1.00]
-; BDVER2-NEXT: jg JCCTGT # sched: [1:1.00]
-; BDVER2-NEXT: jg JCCTGT # sched: [1:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_jcc:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: JCCTGT:
-; BTVER2-NEXT: jo JCCTGT # sched: [1:0.50]
-; BTVER2-NEXT: jno JCCTGT # sched: [1:0.50]
-; BTVER2-NEXT: jb JCCTGT # sched: [1:0.50]
-; BTVER2-NEXT: jb JCCTGT # sched: [1:0.50]
-; BTVER2-NEXT: jb JCCTGT # sched: [1:0.50]
-; BTVER2-NEXT: jae JCCTGT # sched: [1:0.50]
-; BTVER2-NEXT: jae JCCTGT # sched: [1:0.50]
-; BTVER2-NEXT: jae JCCTGT # sched: [1:0.50]
-; BTVER2-NEXT: je JCCTGT # sched: [1:0.50]
-; BTVER2-NEXT: je JCCTGT # sched: [1:0.50]
-; BTVER2-NEXT: jne JCCTGT # sched: [1:0.50]
-; BTVER2-NEXT: jne JCCTGT # sched: [1:0.50]
-; BTVER2-NEXT: jbe JCCTGT # sched: [1:0.50]
-; BTVER2-NEXT: jbe JCCTGT # sched: [1:0.50]
-; BTVER2-NEXT: ja JCCTGT # sched: [1:0.50]
-; BTVER2-NEXT: ja JCCTGT # sched: [1:0.50]
-; BTVER2-NEXT: js JCCTGT # sched: [1:0.50]
-; BTVER2-NEXT: jns JCCTGT # sched: [1:0.50]
-; BTVER2-NEXT: jp JCCTGT # sched: [1:0.50]
-; BTVER2-NEXT: jp JCCTGT # sched: [1:0.50]
-; BTVER2-NEXT: jnp JCCTGT # sched: [1:0.50]
-; BTVER2-NEXT: jnp JCCTGT # sched: [1:0.50]
-; BTVER2-NEXT: jl JCCTGT # sched: [1:0.50]
-; BTVER2-NEXT: jl JCCTGT # sched: [1:0.50]
-; BTVER2-NEXT: jge JCCTGT # sched: [1:0.50]
-; BTVER2-NEXT: jge JCCTGT # sched: [1:0.50]
-; BTVER2-NEXT: jle JCCTGT # sched: [1:0.50]
-; BTVER2-NEXT: jle JCCTGT # sched: [1:0.50]
-; BTVER2-NEXT: jg JCCTGT # sched: [1:0.50]
-; BTVER2-NEXT: jg JCCTGT # sched: [1:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_jcc:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: JCCTGT:
-; ZNVER1-NEXT: jo JCCTGT # sched: [1:0.25]
-; ZNVER1-NEXT: jno JCCTGT # sched: [1:0.25]
-; ZNVER1-NEXT: jb JCCTGT # sched: [1:0.25]
-; ZNVER1-NEXT: jb JCCTGT # sched: [1:0.25]
-; ZNVER1-NEXT: jb JCCTGT # sched: [1:0.25]
-; ZNVER1-NEXT: jae JCCTGT # sched: [1:0.25]
-; ZNVER1-NEXT: jae JCCTGT # sched: [1:0.25]
-; ZNVER1-NEXT: jae JCCTGT # sched: [1:0.25]
-; ZNVER1-NEXT: je JCCTGT # sched: [1:0.25]
-; ZNVER1-NEXT: je JCCTGT # sched: [1:0.25]
-; ZNVER1-NEXT: jne JCCTGT # sched: [1:0.25]
-; ZNVER1-NEXT: jne JCCTGT # sched: [1:0.25]
-; ZNVER1-NEXT: jbe JCCTGT # sched: [1:0.25]
-; ZNVER1-NEXT: jbe JCCTGT # sched: [1:0.25]
-; ZNVER1-NEXT: ja JCCTGT # sched: [1:0.25]
-; ZNVER1-NEXT: ja JCCTGT # sched: [1:0.25]
-; ZNVER1-NEXT: js JCCTGT # sched: [1:0.25]
-; ZNVER1-NEXT: jns JCCTGT # sched: [1:0.25]
-; ZNVER1-NEXT: jp JCCTGT # sched: [1:0.25]
-; ZNVER1-NEXT: jp JCCTGT # sched: [1:0.25]
-; ZNVER1-NEXT: jnp JCCTGT # sched: [1:0.25]
-; ZNVER1-NEXT: jnp JCCTGT # sched: [1:0.25]
-; ZNVER1-NEXT: jl JCCTGT # sched: [1:0.25]
-; ZNVER1-NEXT: jl JCCTGT # sched: [1:0.25]
-; ZNVER1-NEXT: jge JCCTGT # sched: [1:0.25]
-; ZNVER1-NEXT: jge JCCTGT # sched: [1:0.25]
-; ZNVER1-NEXT: jle JCCTGT # sched: [1:0.25]
-; ZNVER1-NEXT: jle JCCTGT # sched: [1:0.25]
-; ZNVER1-NEXT: jg JCCTGT # sched: [1:0.25]
-; ZNVER1-NEXT: jg JCCTGT # sched: [1:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "JCCTGT: \0A\09 jo JCCTGT \0A\09 jno JCCTGT \0A\09 jb JCCTGT \0A\09 jc JCCTGT \0A\09 jnae JCCTGT \0A\09 jnb JCCTGT \0A\09 jnc JCCTGT \0A\09 jae JCCTGT \0A\09 jz JCCTGT \0A\09 je JCCTGT \0A\09 jnz JCCTGT \0A\09 jne JCCTGT \0A\09 jbe JCCTGT \0A\09 jna JCCTGT \0A\09 jnbe JCCTGT \0A\09 ja JCCTGT \0A\09 js JCCTGT \0A\09 jns JCCTGT \0A\09 jp JCCTGT \0A\09 jpe JCCTGT \0A\09 jnp JCCTGT \0A\09 jpo JCCTGT \0A\09 jl JCCTGT \0A\09 jnge JCCTGT \0A\09 jnl JCCTGT \0A\09 jge JCCTGT \0A\09 jle JCCTGT \0A\09 jng JCCTGT \0A\09 jnle JCCTGT \0A\09 jg JCCTGT", ""()
- ret void
-}
-
-define void @test_jecxz_jrcxz() optsize {
-; GENERIC-LABEL: test_jecxz_jrcxz:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: JXTGT:
-; GENERIC-NEXT: jecxz JXTGT # sched: [2:1.00]
-; GENERIC-NEXT: jrcxz JXTGT # sched: [2:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_jecxz_jrcxz:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: JXTGT:
-; ATOM-NEXT: jecxz JXTGT # sched: [4:2.00]
-; ATOM-NEXT: jrcxz JXTGT # sched: [4:2.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_jecxz_jrcxz:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: JXTGT:
-; SLM-NEXT: jecxz JXTGT # sched: [1:1.00]
-; SLM-NEXT: jrcxz JXTGT # sched: [1:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_jecxz_jrcxz:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: JXTGT:
-; SANDY-NEXT: jecxz JXTGT # sched: [2:1.00]
-; SANDY-NEXT: jrcxz JXTGT # sched: [2:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_jecxz_jrcxz:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: JXTGT:
-; HASWELL-NEXT: jecxz JXTGT # sched: [2:0.50]
-; HASWELL-NEXT: jrcxz JXTGT # sched: [2:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_jecxz_jrcxz:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: JXTGT:
-; BROADWELL-NEXT: jecxz JXTGT # sched: [2:0.50]
-; BROADWELL-NEXT: jrcxz JXTGT # sched: [2:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_jecxz_jrcxz:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: JXTGT:
-; SKYLAKE-NEXT: jecxz JXTGT # sched: [2:0.50]
-; SKYLAKE-NEXT: jrcxz JXTGT # sched: [2:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_jecxz_jrcxz:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: JXTGT:
-; SKX-NEXT: jecxz JXTGT # sched: [2:0.50]
-; SKX-NEXT: jrcxz JXTGT # sched: [2:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_jecxz_jrcxz:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: JXTGT:
-; BDVER2-NEXT: jecxz JXTGT # sched: [1:1.00]
-; BDVER2-NEXT: jrcxz JXTGT # sched: [1:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_jecxz_jrcxz:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: JXTGT:
-; BTVER2-NEXT: jecxz JXTGT # sched: [1:0.50]
-; BTVER2-NEXT: jrcxz JXTGT # sched: [1:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_jecxz_jrcxz:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: JXTGT:
-; ZNVER1-NEXT: jecxz JXTGT # sched: [1:0.50]
-; ZNVER1-NEXT: jrcxz JXTGT # sched: [1:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "JXTGT: \0A\09 jecxz JXTGT \0A\09 jrcxz JXTGT", ""()
- ret void
-}
-
-; TODO - test_jmp
-
-define void @test_lahf_sahf() optsize {
-; GENERIC-LABEL: test_lahf_sahf:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: lahf # sched: [1:0.50]
-; GENERIC-NEXT: sahf # sched: [1:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_lahf_sahf:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: lahf # sched: [2:1.00]
-; ATOM-NEXT: sahf # sched: [2:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_lahf_sahf:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: lahf # sched: [1:0.50]
-; SLM-NEXT: sahf # sched: [1:0.50]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_lahf_sahf:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: lahf # sched: [1:0.50]
-; SANDY-NEXT: sahf # sched: [1:0.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_lahf_sahf:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: lahf # sched: [1:0.50]
-; HASWELL-NEXT: sahf # sched: [1:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_lahf_sahf:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: lahf # sched: [1:0.50]
-; BROADWELL-NEXT: sahf # sched: [1:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_lahf_sahf:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: lahf # sched: [1:0.50]
-; SKYLAKE-NEXT: sahf # sched: [1:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_lahf_sahf:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: lahf # sched: [1:0.50]
-; SKX-NEXT: sahf # sched: [1:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_lahf_sahf:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: lahf # sched: [2:0.50]
-; BDVER2-NEXT: sahf # sched: [2:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_lahf_sahf:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: lahf # sched: [1:0.50]
-; BTVER2-NEXT: sahf # sched: [1:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_lahf_sahf:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: lahf # sched: [100:0.25]
-; ZNVER1-NEXT: sahf # sched: [2:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "lahf \0A\09 sahf", ""() nounwind
- ret void
-}
-
-; TODO - test_lds
-; TODO - test_les
-; TODO - test_lfs
-; TODO - test_lgs
-; TODO - test_lss
-
-; TODO - test_lea
-
-define void @test_leave() optsize {
-; GENERIC-LABEL: test_leave:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: leave # sched: [7:0.67]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_leave:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: leave # sched: [2:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_leave:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: leave # sched: [1:0.50]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_leave:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: leave # sched: [7:0.67]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_leave:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: leave # sched: [7:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_leave:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: leave # sched: [7:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_leave:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: leave # sched: [7:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_leave:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: leave # sched: [7:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_leave:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: leave # sched: [1:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_leave:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: leave # sched: [1:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_leave:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: leave # sched: [8:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "leave", ""() nounwind
- ret void
-}
-
-define void @test_lods() optsize {
-; GENERIC-LABEL: test_lods:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: lodsb (%rsi), %al # sched: [7:0.67]
-; GENERIC-NEXT: lodsw (%rsi), %ax # sched: [7:0.67]
-; GENERIC-NEXT: lodsl (%rsi), %eax # sched: [6:0.50]
-; GENERIC-NEXT: lodsq (%rsi), %rax # sched: [6:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_lods:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: lodsb (%rsi), %al # sched: [2:1.00]
-; ATOM-NEXT: lodsw (%rsi), %ax # sched: [2:1.00]
-; ATOM-NEXT: lodsl (%rsi), %eax # sched: [2:1.00]
-; ATOM-NEXT: lodsq (%rsi), %rax # sched: [2:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_lods:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: lodsb (%rsi), %al # sched: [100:1.00]
-; SLM-NEXT: lodsw (%rsi), %ax # sched: [100:1.00]
-; SLM-NEXT: lodsl (%rsi), %eax # sched: [100:1.00]
-; SLM-NEXT: lodsq (%rsi), %rax # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_lods:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: lodsb (%rsi), %al # sched: [7:0.67]
-; SANDY-NEXT: lodsw (%rsi), %ax # sched: [7:0.67]
-; SANDY-NEXT: lodsl (%rsi), %eax # sched: [6:0.50]
-; SANDY-NEXT: lodsq (%rsi), %rax # sched: [6:0.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_lods:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: lodsb (%rsi), %al # sched: [1:0.50]
-; HASWELL-NEXT: lodsw (%rsi), %ax # sched: [1:0.50]
-; HASWELL-NEXT: lodsl (%rsi), %eax # sched: [1:0.50]
-; HASWELL-NEXT: lodsq (%rsi), %rax # sched: [1:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_lods:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: lodsb (%rsi), %al # sched: [100:0.25]
-; BROADWELL-NEXT: lodsw (%rsi), %ax # sched: [100:0.25]
-; BROADWELL-NEXT: lodsl (%rsi), %eax # sched: [100:0.25]
-; BROADWELL-NEXT: lodsq (%rsi), %rax # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_lods:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: lodsb (%rsi), %al # sched: [100:0.25]
-; SKYLAKE-NEXT: lodsw (%rsi), %ax # sched: [100:0.25]
-; SKYLAKE-NEXT: lodsl (%rsi), %eax # sched: [100:0.25]
-; SKYLAKE-NEXT: lodsq (%rsi), %rax # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_lods:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: lodsb (%rsi), %al # sched: [100:0.25]
-; SKX-NEXT: lodsw (%rsi), %ax # sched: [100:0.25]
-; SKX-NEXT: lodsl (%rsi), %eax # sched: [100:0.25]
-; SKX-NEXT: lodsq (%rsi), %rax # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_lods:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: lodsb (%rsi), %al # sched: [100:0.50]
-; BDVER2-NEXT: lodsw (%rsi), %ax # sched: [100:0.50]
-; BDVER2-NEXT: lodsl (%rsi), %eax # sched: [100:0.50]
-; BDVER2-NEXT: lodsq (%rsi), %rax # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_lods:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: lodsb (%rsi), %al # sched: [100:0.50]
-; BTVER2-NEXT: lodsw (%rsi), %ax # sched: [100:0.50]
-; BTVER2-NEXT: lodsl (%rsi), %eax # sched: [100:0.50]
-; BTVER2-NEXT: lodsq (%rsi), %rax # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_lods:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: lodsb (%rsi), %al # sched: [100:0.25]
-; ZNVER1-NEXT: lodsw (%rsi), %ax # sched: [100:0.25]
-; ZNVER1-NEXT: lodsl (%rsi), %eax # sched: [100:0.25]
-; ZNVER1-NEXT: lodsq (%rsi), %rax # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "lodsb \0A\09 lodsw \0A\09 lodsl \0A\09 lodsq", ""()
- ret void
-}
-
-define void @test_loop() optsize {
-; GENERIC-LABEL: test_loop:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: LTGT:
-; GENERIC-NEXT: loop LTGT # sched: [1:1.00]
-; GENERIC-NEXT: loope LTGT # sched: [1:1.00]
-; GENERIC-NEXT: loopne LTGT # sched: [1:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_loop:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: LTGT:
-; ATOM-NEXT: loop LTGT # sched: [18:9.00]
-; ATOM-NEXT: loope LTGT # sched: [8:4.00]
-; ATOM-NEXT: loopne LTGT # sched: [17:8.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_loop:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: LTGT:
-; SLM-NEXT: loop LTGT # sched: [1:1.00]
-; SLM-NEXT: loope LTGT # sched: [1:1.00]
-; SLM-NEXT: loopne LTGT # sched: [1:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_loop:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: LTGT:
-; SANDY-NEXT: loop LTGT # sched: [1:1.00]
-; SANDY-NEXT: loope LTGT # sched: [1:1.00]
-; SANDY-NEXT: loopne LTGT # sched: [1:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_loop:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: LTGT:
-; HASWELL-NEXT: loop LTGT # sched: [7:2.00]
-; HASWELL-NEXT: loope LTGT # sched: [11:2.75]
-; HASWELL-NEXT: loopne LTGT # sched: [11:2.75]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_loop:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: LTGT:
-; BROADWELL-NEXT: loop LTGT # sched: [7:2.00]
-; BROADWELL-NEXT: loope LTGT # sched: [11:2.75]
-; BROADWELL-NEXT: loopne LTGT # sched: [11:2.75]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_loop:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: LTGT:
-; SKYLAKE-NEXT: loop LTGT # sched: [7:2.00]
-; SKYLAKE-NEXT: loope LTGT # sched: [11:2.75]
-; SKYLAKE-NEXT: loopne LTGT # sched: [11:2.75]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_loop:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: LTGT:
-; SKX-NEXT: loop LTGT # sched: [7:2.00]
-; SKX-NEXT: loope LTGT # sched: [11:2.75]
-; SKX-NEXT: loopne LTGT # sched: [11:2.75]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_loop:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: LTGT:
-; BDVER2-NEXT: loop LTGT # sched: [1:1.00]
-; BDVER2-NEXT: loope LTGT # sched: [1:1.00]
-; BDVER2-NEXT: loopne LTGT # sched: [1:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_loop:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: LTGT:
-; BTVER2-NEXT: loop LTGT # sched: [1:0.50]
-; BTVER2-NEXT: loope LTGT # sched: [1:0.50]
-; BTVER2-NEXT: loopne LTGT # sched: [1:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_loop:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: LTGT:
-; ZNVER1-NEXT: loop LTGT # sched: [1:0.50]
-; ZNVER1-NEXT: loope LTGT # sched: [1:0.50]
-; ZNVER1-NEXT: loopne LTGT # sched: [1:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "LTGT: \0A\09 loop LTGT \0A\09 loope LTGT \0A\09 loopne LTGT", ""()
- ret void
-}
-
-; TODO - test_mov
-
-define void @test_movnti(i32 %a0, i32 *%a1, i64 %a2, i64 *%a3) optsize {
-; GENERIC-LABEL: test_movnti:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: movntil %edi, (%rsi) # sched: [1:1.00]
-; GENERIC-NEXT: movntiq %rdx, (%rcx) # sched: [1:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movnti:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: movntil %edi, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: movntiq %rdx, (%rcx) # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movnti:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: movntil %edi, (%rsi) # sched: [1:1.00]
-; SLM-NEXT: movntiq %rdx, (%rcx) # sched: [1:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_movnti:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: movntil %edi, (%rsi) # sched: [1:1.00]
-; SANDY-NEXT: movntiq %rdx, (%rcx) # sched: [1:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_movnti:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: movntil %edi, (%rsi) # sched: [1:1.00]
-; HASWELL-NEXT: movntiq %rdx, (%rcx) # sched: [1:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movnti:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: movntil %edi, (%rsi) # sched: [1:1.00]
-; BROADWELL-NEXT: movntiq %rdx, (%rcx) # sched: [1:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movnti:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: movntil %edi, (%rsi) # sched: [1:1.00]
-; SKYLAKE-NEXT: movntiq %rdx, (%rcx) # sched: [1:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movnti:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: movntil %edi, (%rsi) # sched: [1:1.00]
-; SKX-NEXT: movntiq %rdx, (%rcx) # sched: [1:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_movnti:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: movntil %edi, (%rsi) # sched: [1:1.00]
-; BDVER2-NEXT: movntiq %rdx, (%rcx) # sched: [1:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_movnti:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: movntil %edi, (%rsi) # sched: [1:1.00]
-; BTVER2-NEXT: movntiq %rdx, (%rcx) # sched: [1:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_movnti:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: movntil %edi, (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT: movntiq %rdx, (%rcx) # sched: [1:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "movnti $0, $1 \0A\09 movnti $2, $3", "r,*m,r,*m"(i32 %a0, i32 *%a1, i64 %a2, i64 *%a3)
- ret void
-}
-
-define void @test_movs() optsize {
-; GENERIC-LABEL: test_movs:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: movsb (%rsi), %es:(%rdi) # sched: [8:1.00]
-; GENERIC-NEXT: movsw (%rsi), %es:(%rdi) # sched: [8:1.00]
-; GENERIC-NEXT: movsl (%rsi), %es:(%rdi) # sched: [8:1.00]
-; GENERIC-NEXT: movsq (%rsi), %es:(%rdi) # sched: [8:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movs:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: movsb (%rsi), %es:(%rdi) # sched: [3:1.50]
-; ATOM-NEXT: movsw (%rsi), %es:(%rdi) # sched: [3:1.50]
-; ATOM-NEXT: movsl (%rsi), %es:(%rdi) # sched: [3:1.50]
-; ATOM-NEXT: movsq (%rsi), %es:(%rdi) # sched: [3:1.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movs:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: movsb (%rsi), %es:(%rdi) # sched: [100:1.00]
-; SLM-NEXT: movsw (%rsi), %es:(%rdi) # sched: [100:1.00]
-; SLM-NEXT: movsl (%rsi), %es:(%rdi) # sched: [100:1.00]
-; SLM-NEXT: movsq (%rsi), %es:(%rdi) # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_movs:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: movsb (%rsi), %es:(%rdi) # sched: [8:1.00]
-; SANDY-NEXT: movsw (%rsi), %es:(%rdi) # sched: [8:1.00]
-; SANDY-NEXT: movsl (%rsi), %es:(%rdi) # sched: [8:1.00]
-; SANDY-NEXT: movsq (%rsi), %es:(%rdi) # sched: [8:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_movs:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: movsb (%rsi), %es:(%rdi) # sched: [4:1.00]
-; HASWELL-NEXT: movsw (%rsi), %es:(%rdi) # sched: [4:1.00]
-; HASWELL-NEXT: movsl (%rsi), %es:(%rdi) # sched: [4:1.00]
-; HASWELL-NEXT: movsq (%rsi), %es:(%rdi) # sched: [4:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movs:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: movsb (%rsi), %es:(%rdi) # sched: [100:0.25]
-; BROADWELL-NEXT: movsw (%rsi), %es:(%rdi) # sched: [100:0.25]
-; BROADWELL-NEXT: movsl (%rsi), %es:(%rdi) # sched: [100:0.25]
-; BROADWELL-NEXT: movsq (%rsi), %es:(%rdi) # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movs:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: movsb (%rsi), %es:(%rdi) # sched: [100:0.25]
-; SKYLAKE-NEXT: movsw (%rsi), %es:(%rdi) # sched: [100:0.25]
-; SKYLAKE-NEXT: movsl (%rsi), %es:(%rdi) # sched: [100:0.25]
-; SKYLAKE-NEXT: movsq (%rsi), %es:(%rdi) # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movs:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: movsb (%rsi), %es:(%rdi) # sched: [100:0.25]
-; SKX-NEXT: movsw (%rsi), %es:(%rdi) # sched: [100:0.25]
-; SKX-NEXT: movsl (%rsi), %es:(%rdi) # sched: [100:0.25]
-; SKX-NEXT: movsq (%rsi), %es:(%rdi) # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_movs:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: movsb (%rsi), %es:(%rdi) # sched: [100:0.50]
-; BDVER2-NEXT: movsw (%rsi), %es:(%rdi) # sched: [100:0.50]
-; BDVER2-NEXT: movsl (%rsi), %es:(%rdi) # sched: [100:0.50]
-; BDVER2-NEXT: movsq (%rsi), %es:(%rdi) # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_movs:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: movsb (%rsi), %es:(%rdi) # sched: [100:0.50]
-; BTVER2-NEXT: movsw (%rsi), %es:(%rdi) # sched: [100:0.50]
-; BTVER2-NEXT: movsl (%rsi), %es:(%rdi) # sched: [100:0.50]
-; BTVER2-NEXT: movsq (%rsi), %es:(%rdi) # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_movs:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: movsb (%rsi), %es:(%rdi) # sched: [100:0.25]
-; ZNVER1-NEXT: movsw (%rsi), %es:(%rdi) # sched: [100:0.25]
-; ZNVER1-NEXT: movsl (%rsi), %es:(%rdi) # sched: [100:0.25]
-; ZNVER1-NEXT: movsq (%rsi), %es:(%rdi) # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "movsb \0A\09 movsw \0A\09 movsl \0A\09 movsq", ""()
- ret void
-}
-
-; TODO - test_movsx
-; TODO - test_movzx
-
-define i64 @test_movslq(i32 %a0, i32 *%a1) optsize {
-; GENERIC-LABEL: test_movslq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: movslq %edi, %rax # sched: [1:0.33]
-; GENERIC-NEXT: movslq (%rsi), %rcx # sched: [5:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: orq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movslq:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: movslq %edi, %rax # sched: [1:1.00]
-; ATOM-NEXT: movslq (%rsi), %rcx # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: orq %rcx, %rax # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movslq:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: movslq %edi, %rax # sched: [1:0.50]
-; SLM-NEXT: movslq (%rsi), %rcx # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: orq %rcx, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_movslq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: movslq %edi, %rax # sched: [1:0.33]
-; SANDY-NEXT: movslq (%rsi), %rcx # sched: [5:0.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: orq %rcx, %rax # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_movslq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: movslq %edi, %rax # sched: [1:0.25]
-; HASWELL-NEXT: movslq (%rsi), %rcx # sched: [5:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: orq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movslq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: movslq %edi, %rax # sched: [1:0.25]
-; BROADWELL-NEXT: movslq (%rsi), %rcx # sched: [5:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: orq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movslq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: movslq %edi, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT: movslq (%rsi), %rcx # sched: [5:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: orq %rcx, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movslq:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: movslq %edi, %rax # sched: [1:0.25]
-; SKX-NEXT: movslq (%rsi), %rcx # sched: [5:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: orq %rcx, %rax # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_movslq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: movslq %edi, %rax # sched: [1:0.50]
-; BDVER2-NEXT: movslq (%rsi), %rcx # sched: [5:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_movslq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: movslq %edi, %rax # sched: [1:0.50]
-; BTVER2-NEXT: movslq (%rsi), %rcx # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: orq %rcx, %rax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_movslq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: movslq %edi, %rax # sched: [1:0.25]
-; ZNVER1-NEXT: movslq (%rsi), %rcx # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: orq %rcx, %rax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call { i64, i64 } asm sideeffect "movslq $2, $0 \0A\09 movslq $3, $1", "=r,=r,r,*m"(i32 %a0, i32 *%a1)
- %2 = extractvalue { i64, i64 } %1, 0
- %3 = extractvalue { i64, i64 } %1, 1
- %4 = or i64 %2, %3
- ret i64 %4
-}
-
-define void @test_mul(i8 %a0, i16 %a1, i32 %a2, i64 %a3, i8 *%p0, i16 *%p1, i32 *%p2, i64 *%p3) optsize {
-; GENERIC-LABEL: test_mul:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
-; GENERIC-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: mulb %dil # sched: [3:1.00]
-; GENERIC-NEXT: mulb (%r8) # sched: [8:1.00]
-; GENERIC-NEXT: mulw %si # sched: [4:1.33]
-; GENERIC-NEXT: mulw (%r9) # sched: [9:1.33]
-; GENERIC-NEXT: mull %edx # sched: [4:1.00]
-; GENERIC-NEXT: mull (%rax) # sched: [9:1.00]
-; GENERIC-NEXT: mulq %rcx # sched: [4:1.00]
-; GENERIC-NEXT: mulq (%r10) # sched: [9:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_mul:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [1:1.00]
-; ATOM-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: mulb %dil # sched: [7:3.50]
-; ATOM-NEXT: mulb (%r8) # sched: [7:3.50]
-; ATOM-NEXT: mulw %si # sched: [7:3.50]
-; ATOM-NEXT: mulw (%r9) # sched: [8:4.00]
-; ATOM-NEXT: mull %edx # sched: [6:3.00]
-; ATOM-NEXT: mull (%rax) # sched: [7:3.50]
-; ATOM-NEXT: mulq %rcx # sched: [12:6.00]
-; ATOM-NEXT: mulq (%r10) # sched: [12:6.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_mul:
-; SLM: # %bb.0:
-; SLM-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [3:1.00]
-; SLM-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: mulb %dil # sched: [3:1.00]
-; SLM-NEXT: mulb (%r8) # sched: [6:1.00]
-; SLM-NEXT: mulw %si # sched: [3:1.00]
-; SLM-NEXT: mulw (%r9) # sched: [6:1.00]
-; SLM-NEXT: mull %edx # sched: [3:1.00]
-; SLM-NEXT: mull (%rax) # sched: [6:1.00]
-; SLM-NEXT: mulq %rcx # sched: [3:1.00]
-; SLM-NEXT: mulq (%r10) # sched: [6:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_mul:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
-; SANDY-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: mulb %dil # sched: [3:1.00]
-; SANDY-NEXT: mulb (%r8) # sched: [8:1.00]
-; SANDY-NEXT: mulw %si # sched: [4:1.33]
-; SANDY-NEXT: mulw (%r9) # sched: [9:1.33]
-; SANDY-NEXT: mull %edx # sched: [4:1.00]
-; SANDY-NEXT: mull (%rax) # sched: [9:1.00]
-; SANDY-NEXT: mulq %rcx # sched: [4:1.00]
-; SANDY-NEXT: mulq (%r10) # sched: [9:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_mul:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
-; HASWELL-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: mulb %dil # sched: [3:1.00]
-; HASWELL-NEXT: mulb (%r8) # sched: [8:1.00]
-; HASWELL-NEXT: mulw %si # sched: [4:1.00]
-; HASWELL-NEXT: mulw (%r9) # sched: [9:1.00]
-; HASWELL-NEXT: mull %edx # sched: [4:1.00]
-; HASWELL-NEXT: mull (%rax) # sched: [9:1.00]
-; HASWELL-NEXT: mulq %rcx # sched: [4:1.00]
-; HASWELL-NEXT: mulq (%r10) # sched: [9:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_mul:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
-; BROADWELL-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: mulb %dil # sched: [3:1.00]
-; BROADWELL-NEXT: mulb (%r8) # sched: [8:1.00]
-; BROADWELL-NEXT: mulw %si # sched: [4:1.00]
-; BROADWELL-NEXT: mulw (%r9) # sched: [9:1.00]
-; BROADWELL-NEXT: mull %edx # sched: [4:1.00]
-; BROADWELL-NEXT: mull (%rax) # sched: [9:1.00]
-; BROADWELL-NEXT: mulq %rcx # sched: [4:1.00]
-; BROADWELL-NEXT: mulq (%r10) # sched: [9:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_mul:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
-; SKYLAKE-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: mulb %dil # sched: [3:1.00]
-; SKYLAKE-NEXT: mulb (%r8) # sched: [8:1.00]
-; SKYLAKE-NEXT: mulw %si # sched: [4:1.00]
-; SKYLAKE-NEXT: mulw (%r9) # sched: [9:1.00]
-; SKYLAKE-NEXT: mull %edx # sched: [4:1.00]
-; SKYLAKE-NEXT: mull (%rax) # sched: [9:1.00]
-; SKYLAKE-NEXT: mulq %rcx # sched: [4:1.00]
-; SKYLAKE-NEXT: mulq (%r10) # sched: [9:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_mul:
-; SKX: # %bb.0:
-; SKX-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
-; SKX-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: mulb %dil # sched: [3:1.00]
-; SKX-NEXT: mulb (%r8) # sched: [8:1.00]
-; SKX-NEXT: mulw %si # sched: [4:1.00]
-; SKX-NEXT: mulw (%r9) # sched: [9:1.00]
-; SKX-NEXT: mull %edx # sched: [4:1.00]
-; SKX-NEXT: mull (%rax) # sched: [9:1.00]
-; SKX-NEXT: mulq %rcx # sched: [4:1.00]
-; SKX-NEXT: mulq (%r10) # sched: [9:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_mul:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
-; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: mulb %dil # sched: [4:1.00]
-; BDVER2-NEXT: mulb (%r8) # sched: [8:1.00]
-; BDVER2-NEXT: mulw %si # sched: [4:1.00]
-; BDVER2-NEXT: mulw (%r9) # sched: [8:1.00]
-; BDVER2-NEXT: mull %edx # sched: [4:1.00]
-; BDVER2-NEXT: mull (%rax) # sched: [8:1.00]
-; BDVER2-NEXT: mulq %rcx # sched: [6:4.00]
-; BDVER2-NEXT: mulq (%r10) # sched: [10:4.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_mul:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [3:1.00]
-; BTVER2-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: mulb %dil # sched: [3:1.00]
-; BTVER2-NEXT: mulb (%r8) # sched: [6:1.00]
-; BTVER2-NEXT: mulw %si # sched: [3:1.00]
-; BTVER2-NEXT: mulw (%r9) # sched: [6:1.00]
-; BTVER2-NEXT: mull %edx # sched: [3:1.00]
-; BTVER2-NEXT: mull (%rax) # sched: [6:1.00]
-; BTVER2-NEXT: mulq %rcx # sched: [6:4.00]
-; BTVER2-NEXT: mulq (%r10) # sched: [9:4.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_mul:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [8:0.50]
-; ZNVER1-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: mulb %dil # sched: [4:1.00]
-; ZNVER1-NEXT: mulb (%r8) # sched: [8:1.00]
-; ZNVER1-NEXT: mulw %si # sched: [3:1.00]
-; ZNVER1-NEXT: mulw (%r9) # sched: [8:1.00]
-; ZNVER1-NEXT: mull %edx # sched: [3:1.00]
-; ZNVER1-NEXT: mull (%rax) # sched: [8:1.00]
-; ZNVER1-NEXT: mulq %rcx # sched: [4:1.00]
-; ZNVER1-NEXT: mulq (%r10) # sched: [9:1.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "mulb $0 \0A\09 mulb $4 \0A\09 mulw $1 \0A\09 mulw $5 \0A\09 mull $2 \0A\09 mull $6 \0A\09 mulq $3 \0A\09 mulq $7", "r,r,r,r,*m,*m,*m,*m"(i8 %a0, i16 %a1, i32 %a2, i64 %a3, i8 *%p0, i16 *%p1, i32 *%p2, i64 *%p3) nounwind
- ret void
-}
-
-define void @test_neg(i8 %a0, i16 %a1, i32 %a2, i64 %a3, i8 *%p0, i16 *%p1, i32 *%p2, i64 *%p3) optsize {
-; GENERIC-LABEL: test_neg:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
-; GENERIC-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: negb %dil # sched: [1:0.33]
-; GENERIC-NEXT: negb (%r8) # sched: [7:1.00]
-; GENERIC-NEXT: negw %si # sched: [1:0.33]
-; GENERIC-NEXT: negw (%r9) # sched: [7:1.00]
-; GENERIC-NEXT: negl %edx # sched: [1:0.33]
-; GENERIC-NEXT: negl (%rax) # sched: [7:1.00]
-; GENERIC-NEXT: negq %rcx # sched: [1:0.33]
-; GENERIC-NEXT: negq (%r10) # sched: [7:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_neg:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [1:1.00]
-; ATOM-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: negb %dil # sched: [1:0.50]
-; ATOM-NEXT: negb (%r8) # sched: [1:1.00]
-; ATOM-NEXT: negw %si # sched: [1:0.50]
-; ATOM-NEXT: negw (%r9) # sched: [1:1.00]
-; ATOM-NEXT: negl %edx # sched: [1:0.50]
-; ATOM-NEXT: negl (%rax) # sched: [1:1.00]
-; ATOM-NEXT: negq %rcx # sched: [1:0.50]
-; ATOM-NEXT: negq (%r10) # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_neg:
-; SLM: # %bb.0:
-; SLM-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [3:1.00]
-; SLM-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: negb %dil # sched: [1:0.50]
-; SLM-NEXT: negb (%r8) # sched: [5:2.00]
-; SLM-NEXT: negw %si # sched: [1:0.50]
-; SLM-NEXT: negw (%r9) # sched: [5:2.00]
-; SLM-NEXT: negl %edx # sched: [1:0.50]
-; SLM-NEXT: negl (%rax) # sched: [5:2.00]
-; SLM-NEXT: negq %rcx # sched: [1:0.50]
-; SLM-NEXT: negq (%r10) # sched: [5:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_neg:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
-; SANDY-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: negb %dil # sched: [1:0.33]
-; SANDY-NEXT: negb (%r8) # sched: [7:1.00]
-; SANDY-NEXT: negw %si # sched: [1:0.33]
-; SANDY-NEXT: negw (%r9) # sched: [7:1.00]
-; SANDY-NEXT: negl %edx # sched: [1:0.33]
-; SANDY-NEXT: negl (%rax) # sched: [7:1.00]
-; SANDY-NEXT: negq %rcx # sched: [1:0.33]
-; SANDY-NEXT: negq (%r10) # sched: [7:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_neg:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
-; HASWELL-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: negb %dil # sched: [1:0.25]
-; HASWELL-NEXT: negb (%r8) # sched: [7:1.00]
-; HASWELL-NEXT: negw %si # sched: [1:0.25]
-; HASWELL-NEXT: negw (%r9) # sched: [7:1.00]
-; HASWELL-NEXT: negl %edx # sched: [1:0.25]
-; HASWELL-NEXT: negl (%rax) # sched: [7:1.00]
-; HASWELL-NEXT: negq %rcx # sched: [1:0.25]
-; HASWELL-NEXT: negq (%r10) # sched: [7:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_neg:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
-; BROADWELL-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: negb %dil # sched: [1:0.25]
-; BROADWELL-NEXT: negb (%r8) # sched: [7:1.00]
-; BROADWELL-NEXT: negw %si # sched: [1:0.25]
-; BROADWELL-NEXT: negw (%r9) # sched: [7:1.00]
-; BROADWELL-NEXT: negl %edx # sched: [1:0.25]
-; BROADWELL-NEXT: negl (%rax) # sched: [7:1.00]
-; BROADWELL-NEXT: negq %rcx # sched: [1:0.25]
-; BROADWELL-NEXT: negq (%r10) # sched: [7:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_neg:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
-; SKYLAKE-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: negb %dil # sched: [1:0.25]
-; SKYLAKE-NEXT: negb (%r8) # sched: [7:1.00]
-; SKYLAKE-NEXT: negw %si # sched: [1:0.25]
-; SKYLAKE-NEXT: negw (%r9) # sched: [7:1.00]
-; SKYLAKE-NEXT: negl %edx # sched: [1:0.25]
-; SKYLAKE-NEXT: negl (%rax) # sched: [7:1.00]
-; SKYLAKE-NEXT: negq %rcx # sched: [1:0.25]
-; SKYLAKE-NEXT: negq (%r10) # sched: [7:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_neg:
-; SKX: # %bb.0:
-; SKX-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
-; SKX-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: negb %dil # sched: [1:0.25]
-; SKX-NEXT: negb (%r8) # sched: [7:1.00]
-; SKX-NEXT: negw %si # sched: [1:0.25]
-; SKX-NEXT: negw (%r9) # sched: [7:1.00]
-; SKX-NEXT: negl %edx # sched: [1:0.25]
-; SKX-NEXT: negl (%rax) # sched: [7:1.00]
-; SKX-NEXT: negq %rcx # sched: [1:0.25]
-; SKX-NEXT: negq (%r10) # sched: [7:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_neg:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
-; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: negb %dil # sched: [1:0.50]
-; BDVER2-NEXT: negb (%r8) # sched: [6:1.00]
-; BDVER2-NEXT: negw %si # sched: [1:0.50]
-; BDVER2-NEXT: negw (%r9) # sched: [6:1.00]
-; BDVER2-NEXT: negl %edx # sched: [1:0.50]
-; BDVER2-NEXT: negl (%rax) # sched: [6:1.00]
-; BDVER2-NEXT: negq %rcx # sched: [1:0.50]
-; BDVER2-NEXT: negq (%r10) # sched: [6:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_neg:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [3:1.00]
-; BTVER2-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: negb %dil # sched: [1:0.50]
-; BTVER2-NEXT: negb (%r8) # sched: [5:1.00]
-; BTVER2-NEXT: negw %si # sched: [1:0.50]
-; BTVER2-NEXT: negw (%r9) # sched: [5:1.00]
-; BTVER2-NEXT: negl %edx # sched: [1:0.50]
-; BTVER2-NEXT: negl (%rax) # sched: [5:1.00]
-; BTVER2-NEXT: negq %rcx # sched: [1:0.50]
-; BTVER2-NEXT: negq (%r10) # sched: [5:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_neg:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [8:0.50]
-; ZNVER1-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: negb %dil # sched: [1:0.25]
-; ZNVER1-NEXT: negb (%r8) # sched: [5:0.50]
-; ZNVER1-NEXT: negw %si # sched: [1:0.25]
-; ZNVER1-NEXT: negw (%r9) # sched: [5:0.50]
-; ZNVER1-NEXT: negl %edx # sched: [1:0.25]
-; ZNVER1-NEXT: negl (%rax) # sched: [5:0.50]
-; ZNVER1-NEXT: negq %rcx # sched: [1:0.25]
-; ZNVER1-NEXT: negq (%r10) # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "negb $0 \0A\09 negb $4 \0A\09 negw $1 \0A\09 negw $5 \0A\09 negl $2 \0A\09 negl $6 \0A\09 negq $3 \0A\09 negq $7", "r,r,r,r,*m,*m,*m,*m"(i8 %a0, i16 %a1, i32 %a2, i64 %a3, i8 *%p0, i16 *%p1, i32 *%p2, i64 *%p3) nounwind
- ret void
-}
-
-define void @test_nop(i16 %a0, i32 %a1, i64 %a2, i16 *%p0, i32 *%p1, i64 *%p2) optsize {
-; GENERIC-LABEL: test_nop:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: nop # sched: [1:0.25]
-; GENERIC-NEXT: nopw %di # sched: [1:0.25]
-; GENERIC-NEXT: nopw (%rcx) # sched: [1:0.25]
-; GENERIC-NEXT: nopl %esi # sched: [1:0.25]
-; GENERIC-NEXT: nopl (%r8) # sched: [1:0.25]
-; GENERIC-NEXT: nopq %rdx # sched: [1:0.25]
-; GENERIC-NEXT: nopq (%r9) # sched: [1:0.25]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_nop:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nopw %di # sched: [1:0.50]
-; ATOM-NEXT: nopw (%rcx) # sched: [1:0.50]
-; ATOM-NEXT: nopl %esi # sched: [1:0.50]
-; ATOM-NEXT: nopl (%r8) # sched: [1:0.50]
-; ATOM-NEXT: nopq %rdx # sched: [1:0.50]
-; ATOM-NEXT: nopq (%r9) # sched: [1:0.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_nop:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: nop # sched: [1:0.50]
-; SLM-NEXT: nopw %di # sched: [1:0.50]
-; SLM-NEXT: nopw (%rcx) # sched: [1:0.50]
-; SLM-NEXT: nopl %esi # sched: [1:0.50]
-; SLM-NEXT: nopl (%r8) # sched: [1:0.50]
-; SLM-NEXT: nopq %rdx # sched: [1:0.50]
-; SLM-NEXT: nopq (%r9) # sched: [1:0.50]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_nop:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: nop # sched: [1:0.25]
-; SANDY-NEXT: nopw %di # sched: [1:0.25]
-; SANDY-NEXT: nopw (%rcx) # sched: [1:0.25]
-; SANDY-NEXT: nopl %esi # sched: [1:0.25]
-; SANDY-NEXT: nopl (%r8) # sched: [1:0.25]
-; SANDY-NEXT: nopq %rdx # sched: [1:0.25]
-; SANDY-NEXT: nopq (%r9) # sched: [1:0.25]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_nop:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: nop # sched: [1:0.25]
-; HASWELL-NEXT: nopw %di # sched: [1:0.25]
-; HASWELL-NEXT: nopw (%rcx) # sched: [1:0.25]
-; HASWELL-NEXT: nopl %esi # sched: [1:0.25]
-; HASWELL-NEXT: nopl (%r8) # sched: [1:0.25]
-; HASWELL-NEXT: nopq %rdx # sched: [1:0.25]
-; HASWELL-NEXT: nopq (%r9) # sched: [1:0.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_nop:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: nop # sched: [1:0.25]
-; BROADWELL-NEXT: nopw %di # sched: [1:0.25]
-; BROADWELL-NEXT: nopw (%rcx) # sched: [1:0.25]
-; BROADWELL-NEXT: nopl %esi # sched: [1:0.25]
-; BROADWELL-NEXT: nopl (%r8) # sched: [1:0.25]
-; BROADWELL-NEXT: nopq %rdx # sched: [1:0.25]
-; BROADWELL-NEXT: nopq (%r9) # sched: [1:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_nop:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: nop # sched: [1:0.17]
-; SKYLAKE-NEXT: nopw %di # sched: [1:0.17]
-; SKYLAKE-NEXT: nopw (%rcx) # sched: [1:0.17]
-; SKYLAKE-NEXT: nopl %esi # sched: [1:0.17]
-; SKYLAKE-NEXT: nopl (%r8) # sched: [1:0.17]
-; SKYLAKE-NEXT: nopq %rdx # sched: [1:0.17]
-; SKYLAKE-NEXT: nopq (%r9) # sched: [1:0.17]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_nop:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: nop # sched: [1:0.17]
-; SKX-NEXT: nopw %di # sched: [1:0.17]
-; SKX-NEXT: nopw (%rcx) # sched: [1:0.17]
-; SKX-NEXT: nopl %esi # sched: [1:0.17]
-; SKX-NEXT: nopl (%r8) # sched: [1:0.17]
-; SKX-NEXT: nopq %rdx # sched: [1:0.17]
-; SKX-NEXT: nopq (%r9) # sched: [1:0.17]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_nop:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: nop # sched: [1:0.50]
-; BDVER2-NEXT: nopw %di # sched: [1:0.50]
-; BDVER2-NEXT: nopw (%rcx) # sched: [1:0.50]
-; BDVER2-NEXT: nopl %esi # sched: [1:0.50]
-; BDVER2-NEXT: nopl (%r8) # sched: [1:0.50]
-; BDVER2-NEXT: nopq %rdx # sched: [1:0.50]
-; BDVER2-NEXT: nopq (%r9) # sched: [1:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_nop:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: nop # sched: [1:0.50]
-; BTVER2-NEXT: nopw %di # sched: [1:0.50]
-; BTVER2-NEXT: nopw (%rcx) # sched: [1:0.50]
-; BTVER2-NEXT: nopl %esi # sched: [1:0.50]
-; BTVER2-NEXT: nopl (%r8) # sched: [1:0.50]
-; BTVER2-NEXT: nopq %rdx # sched: [1:0.50]
-; BTVER2-NEXT: nopq (%r9) # sched: [1:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_nop:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: nop # sched: [1:0.25]
-; ZNVER1-NEXT: nopw %di # sched: [1:0.25]
-; ZNVER1-NEXT: nopw (%rcx) # sched: [1:0.25]
-; ZNVER1-NEXT: nopl %esi # sched: [1:0.25]
-; ZNVER1-NEXT: nopl (%r8) # sched: [1:0.25]
-; ZNVER1-NEXT: nopq %rdx # sched: [1:0.25]
-; ZNVER1-NEXT: nopq (%r9) # sched: [1:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "nop \0A\09 nopw $0 \0A\09 nopw $3 \0A\09 nopl $1 \0A\09 nopl $4 \0A\09 nopq $2 \0A\09 nopq $5", "r,r,r,*m,*m,*m"(i16 %a0, i32 %a1, i64 %a2, i16 *%p0, i32 *%p1, i64 *%p2) nounwind
- ret void
-}
-
-define void @test_not(i8 %a0, i16 %a1, i32 %a2, i64 %a3, i8 *%p0, i16 *%p1, i32 *%p2, i64 *%p3) optsize {
-; GENERIC-LABEL: test_not:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
-; GENERIC-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: notb %dil # sched: [1:0.33]
-; GENERIC-NEXT: notb (%r8) # sched: [7:1.00]
-; GENERIC-NEXT: notw %si # sched: [1:0.33]
-; GENERIC-NEXT: notw (%r9) # sched: [7:1.00]
-; GENERIC-NEXT: notl %edx # sched: [1:0.33]
-; GENERIC-NEXT: notl (%rax) # sched: [7:1.00]
-; GENERIC-NEXT: notq %rcx # sched: [1:0.33]
-; GENERIC-NEXT: notq (%r10) # sched: [7:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_not:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [1:1.00]
-; ATOM-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: notb %dil # sched: [1:0.50]
-; ATOM-NEXT: notb (%r8) # sched: [1:1.00]
-; ATOM-NEXT: notw %si # sched: [1:0.50]
-; ATOM-NEXT: notw (%r9) # sched: [1:1.00]
-; ATOM-NEXT: notl %edx # sched: [1:0.50]
-; ATOM-NEXT: notl (%rax) # sched: [1:1.00]
-; ATOM-NEXT: notq %rcx # sched: [1:0.50]
-; ATOM-NEXT: notq (%r10) # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_not:
-; SLM: # %bb.0:
-; SLM-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [3:1.00]
-; SLM-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: notb %dil # sched: [1:0.50]
-; SLM-NEXT: notb (%r8) # sched: [5:2.00]
-; SLM-NEXT: notw %si # sched: [1:0.50]
-; SLM-NEXT: notw (%r9) # sched: [5:2.00]
-; SLM-NEXT: notl %edx # sched: [1:0.50]
-; SLM-NEXT: notl (%rax) # sched: [5:2.00]
-; SLM-NEXT: notq %rcx # sched: [1:0.50]
-; SLM-NEXT: notq (%r10) # sched: [5:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_not:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
-; SANDY-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: notb %dil # sched: [1:0.33]
-; SANDY-NEXT: notb (%r8) # sched: [7:1.00]
-; SANDY-NEXT: notw %si # sched: [1:0.33]
-; SANDY-NEXT: notw (%r9) # sched: [7:1.00]
-; SANDY-NEXT: notl %edx # sched: [1:0.33]
-; SANDY-NEXT: notl (%rax) # sched: [7:1.00]
-; SANDY-NEXT: notq %rcx # sched: [1:0.33]
-; SANDY-NEXT: notq (%r10) # sched: [7:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_not:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
-; HASWELL-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: notb %dil # sched: [1:0.25]
-; HASWELL-NEXT: notb (%r8) # sched: [7:1.00]
-; HASWELL-NEXT: notw %si # sched: [1:0.25]
-; HASWELL-NEXT: notw (%r9) # sched: [7:1.00]
-; HASWELL-NEXT: notl %edx # sched: [1:0.25]
-; HASWELL-NEXT: notl (%rax) # sched: [7:1.00]
-; HASWELL-NEXT: notq %rcx # sched: [1:0.25]
-; HASWELL-NEXT: notq (%r10) # sched: [7:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_not:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
-; BROADWELL-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: notb %dil # sched: [1:0.25]
-; BROADWELL-NEXT: notb (%r8) # sched: [7:1.00]
-; BROADWELL-NEXT: notw %si # sched: [1:0.25]
-; BROADWELL-NEXT: notw (%r9) # sched: [7:1.00]
-; BROADWELL-NEXT: notl %edx # sched: [1:0.25]
-; BROADWELL-NEXT: notl (%rax) # sched: [7:1.00]
-; BROADWELL-NEXT: notq %rcx # sched: [1:0.25]
-; BROADWELL-NEXT: notq (%r10) # sched: [7:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_not:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
-; SKYLAKE-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: notb %dil # sched: [1:0.25]
-; SKYLAKE-NEXT: notb (%r8) # sched: [7:1.00]
-; SKYLAKE-NEXT: notw %si # sched: [1:0.25]
-; SKYLAKE-NEXT: notw (%r9) # sched: [7:1.00]
-; SKYLAKE-NEXT: notl %edx # sched: [1:0.25]
-; SKYLAKE-NEXT: notl (%rax) # sched: [7:1.00]
-; SKYLAKE-NEXT: notq %rcx # sched: [1:0.25]
-; SKYLAKE-NEXT: notq (%r10) # sched: [7:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_not:
-; SKX: # %bb.0:
-; SKX-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
-; SKX-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: notb %dil # sched: [1:0.25]
-; SKX-NEXT: notb (%r8) # sched: [7:1.00]
-; SKX-NEXT: notw %si # sched: [1:0.25]
-; SKX-NEXT: notw (%r9) # sched: [7:1.00]
-; SKX-NEXT: notl %edx # sched: [1:0.25]
-; SKX-NEXT: notl (%rax) # sched: [7:1.00]
-; SKX-NEXT: notq %rcx # sched: [1:0.25]
-; SKX-NEXT: notq (%r10) # sched: [7:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_not:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [5:0.50]
-; BDVER2-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: notb %dil # sched: [1:0.50]
-; BDVER2-NEXT: notb (%r8) # sched: [6:1.00]
-; BDVER2-NEXT: notw %si # sched: [1:0.50]
-; BDVER2-NEXT: notw (%r9) # sched: [6:1.00]
-; BDVER2-NEXT: notl %edx # sched: [1:0.50]
-; BDVER2-NEXT: notl (%rax) # sched: [6:1.00]
-; BDVER2-NEXT: notq %rcx # sched: [1:0.50]
-; BDVER2-NEXT: notq (%r10) # sched: [6:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_not:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [3:1.00]
-; BTVER2-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: notb %dil # sched: [1:0.50]
-; BTVER2-NEXT: notb (%r8) # sched: [5:1.00]
-; BTVER2-NEXT: notw %si # sched: [1:0.50]
-; BTVER2-NEXT: notw (%r9) # sched: [5:1.00]
-; BTVER2-NEXT: notl %edx # sched: [1:0.50]
-; BTVER2-NEXT: notl (%rax) # sched: [5:1.00]
-; BTVER2-NEXT: notq %rcx # sched: [1:0.50]
-; BTVER2-NEXT: notq (%r10) # sched: [5:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_not:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movq {{[0-9]+}}(%rsp), %r10 # sched: [8:0.50]
-; ZNVER1-NEXT: movq {{[0-9]+}}(%rsp), %rax # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: notb %dil # sched: [1:0.25]
-; ZNVER1-NEXT: notb (%r8) # sched: [5:0.50]
-; ZNVER1-NEXT: notw %si # sched: [1:0.25]
-; ZNVER1-NEXT: notw (%r9) # sched: [5:0.50]
-; ZNVER1-NEXT: notl %edx # sched: [1:0.25]
-; ZNVER1-NEXT: notl (%rax) # sched: [5:0.50]
-; ZNVER1-NEXT: notq %rcx # sched: [1:0.25]
-; ZNVER1-NEXT: notq (%r10) # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "notb $0 \0A\09 notb $4 \0A\09 notw $1 \0A\09 notw $5 \0A\09 notl $2 \0A\09 notl $6 \0A\09 notq $3 \0A\09 notq $7", "r,r,r,r,*m,*m,*m,*m"(i8 %a0, i16 %a1, i32 %a2, i64 %a3, i8 *%p0, i16 *%p1, i32 *%p2, i64 *%p3) nounwind
- ret void
-}
-
-define void @test_or_8(i8 %a0, i8* %a1, i8 %a2) optsize {
-; GENERIC-LABEL: test_or_8:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: orb $7, %al # sched: [1:0.33]
-; GENERIC-NEXT: orb $7, %dil # sched: [1:0.33]
-; GENERIC-NEXT: orb $7, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: orb %dl, %dil # sched: [1:0.33]
-; GENERIC-NEXT: orb %dil, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: orb (%rsi), %dil # sched: [6:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_or_8:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: orb $7, %al # sched: [1:0.50]
-; ATOM-NEXT: orb $7, %dil # sched: [1:0.50]
-; ATOM-NEXT: orb $7, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: orb %dl, %dil # sched: [1:0.50]
-; ATOM-NEXT: orb %dil, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: orb (%rsi), %dil # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_or_8:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: orb $7, %al # sched: [1:0.50]
-; SLM-NEXT: orb $7, %dil # sched: [1:0.50]
-; SLM-NEXT: orb $7, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: orb %dl, %dil # sched: [1:0.50]
-; SLM-NEXT: orb %dil, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: orb (%rsi), %dil # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_or_8:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: orb $7, %al # sched: [1:0.33]
-; SANDY-NEXT: orb $7, %dil # sched: [1:0.33]
-; SANDY-NEXT: orb $7, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: orb %dl, %dil # sched: [1:0.33]
-; SANDY-NEXT: orb %dil, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: orb (%rsi), %dil # sched: [6:0.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_or_8:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: orb $7, %al # sched: [1:0.25]
-; HASWELL-NEXT: orb $7, %dil # sched: [1:0.25]
-; HASWELL-NEXT: orb $7, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: orb %dl, %dil # sched: [1:0.25]
-; HASWELL-NEXT: orb %dil, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: orb (%rsi), %dil # sched: [6:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_or_8:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: orb $7, %al # sched: [1:0.25]
-; BROADWELL-NEXT: orb $7, %dil # sched: [1:0.25]
-; BROADWELL-NEXT: orb $7, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: orb %dl, %dil # sched: [1:0.25]
-; BROADWELL-NEXT: orb %dil, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: orb (%rsi), %dil # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_or_8:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: orb $7, %al # sched: [1:0.25]
-; SKYLAKE-NEXT: orb $7, %dil # sched: [1:0.25]
-; SKYLAKE-NEXT: orb $7, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: orb %dl, %dil # sched: [1:0.25]
-; SKYLAKE-NEXT: orb %dil, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: orb (%rsi), %dil # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_or_8:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: orb $7, %al # sched: [1:0.25]
-; SKX-NEXT: orb $7, %dil # sched: [1:0.25]
-; SKX-NEXT: orb $7, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: orb %dl, %dil # sched: [1:0.25]
-; SKX-NEXT: orb %dil, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: orb (%rsi), %dil # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_or_8:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: orb $7, %al # sched: [1:0.50]
-; BDVER2-NEXT: orb $7, %dil # sched: [1:0.50]
-; BDVER2-NEXT: orb $7, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: orb %dl, %dil # sched: [1:0.50]
-; BDVER2-NEXT: orb %dil, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: orb (%rsi), %dil # sched: [5:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_or_8:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: orb $7, %al # sched: [1:0.50]
-; BTVER2-NEXT: orb $7, %dil # sched: [1:0.50]
-; BTVER2-NEXT: orb $7, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: orb %dl, %dil # sched: [1:0.50]
-; BTVER2-NEXT: orb %dil, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: orb (%rsi), %dil # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_or_8:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: orb $7, %al # sched: [1:0.25]
-; ZNVER1-NEXT: orb $7, %dil # sched: [1:0.25]
-; ZNVER1-NEXT: orb $7, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: orb %dl, %dil # sched: [1:0.25]
-; ZNVER1-NEXT: orb %dil, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: orb (%rsi), %dil # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "orb $3, %AL \0A\09 orb $3, $0 \0A\09 orb $3, $2 \0A\09 orb $1, $0 \0A\09 orb $0, $2 \0A\09 orb $2, $0", "r,r,*m,i"(i8 %a0, i8 %a2, i8* %a1, i8 7) nounwind
- ret void
-}
-define void @test_or_16(i16 %a0, i16* %a1, i16 %a2) optsize {
-; GENERIC-LABEL: test_or_16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: orw $511, %ax # imm = 0x1FF
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: orw $511, %di # imm = 0x1FF
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: orw $511, (%rsi) # imm = 0x1FF
-; GENERIC-NEXT: # sched: [7:1.00]
-; GENERIC-NEXT: orw $7, %di # sched: [1:0.33]
-; GENERIC-NEXT: orw $7, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: orw %dx, %di # sched: [1:0.33]
-; GENERIC-NEXT: orw %di, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: orw (%rsi), %di # sched: [6:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_or_16:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: orw $511, %ax # imm = 0x1FF
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: orw $511, %di # imm = 0x1FF
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: orw $511, (%rsi) # imm = 0x1FF
-; ATOM-NEXT: # sched: [1:1.00]
-; ATOM-NEXT: orw $7, %di # sched: [1:0.50]
-; ATOM-NEXT: orw $7, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: orw %dx, %di # sched: [1:0.50]
-; ATOM-NEXT: orw %di, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: orw (%rsi), %di # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_or_16:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: orw $511, %ax # imm = 0x1FF
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: orw $511, %di # imm = 0x1FF
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: orw $511, (%rsi) # imm = 0x1FF
-; SLM-NEXT: # sched: [5:2.00]
-; SLM-NEXT: orw $7, %di # sched: [1:0.50]
-; SLM-NEXT: orw $7, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: orw %dx, %di # sched: [1:0.50]
-; SLM-NEXT: orw %di, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: orw (%rsi), %di # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_or_16:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: orw $511, %ax # imm = 0x1FF
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: orw $511, %di # imm = 0x1FF
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: orw $511, (%rsi) # imm = 0x1FF
-; SANDY-NEXT: # sched: [7:1.00]
-; SANDY-NEXT: orw $7, %di # sched: [1:0.33]
-; SANDY-NEXT: orw $7, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: orw %dx, %di # sched: [1:0.33]
-; SANDY-NEXT: orw %di, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: orw (%rsi), %di # sched: [6:0.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_or_16:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: orw $511, %ax # imm = 0x1FF
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: orw $511, %di # imm = 0x1FF
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: orw $511, (%rsi) # imm = 0x1FF
-; HASWELL-NEXT: # sched: [7:1.00]
-; HASWELL-NEXT: orw $7, %di # sched: [1:0.25]
-; HASWELL-NEXT: orw $7, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: orw %dx, %di # sched: [1:0.25]
-; HASWELL-NEXT: orw %di, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: orw (%rsi), %di # sched: [6:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_or_16:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: orw $511, %ax # imm = 0x1FF
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: orw $511, %di # imm = 0x1FF
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: orw $511, (%rsi) # imm = 0x1FF
-; BROADWELL-NEXT: # sched: [7:1.00]
-; BROADWELL-NEXT: orw $7, %di # sched: [1:0.25]
-; BROADWELL-NEXT: orw $7, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: orw %dx, %di # sched: [1:0.25]
-; BROADWELL-NEXT: orw %di, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: orw (%rsi), %di # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_or_16:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: orw $511, %ax # imm = 0x1FF
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: orw $511, %di # imm = 0x1FF
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: orw $511, (%rsi) # imm = 0x1FF
-; SKYLAKE-NEXT: # sched: [7:1.00]
-; SKYLAKE-NEXT: orw $7, %di # sched: [1:0.25]
-; SKYLAKE-NEXT: orw $7, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: orw %dx, %di # sched: [1:0.25]
-; SKYLAKE-NEXT: orw %di, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: orw (%rsi), %di # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_or_16:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: orw $511, %ax # imm = 0x1FF
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: orw $511, %di # imm = 0x1FF
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: orw $511, (%rsi) # imm = 0x1FF
-; SKX-NEXT: # sched: [7:1.00]
-; SKX-NEXT: orw $7, %di # sched: [1:0.25]
-; SKX-NEXT: orw $7, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: orw %dx, %di # sched: [1:0.25]
-; SKX-NEXT: orw %di, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: orw (%rsi), %di # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_or_16:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: orw $511, %ax # imm = 0x1FF
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: orw $511, %di # imm = 0x1FF
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: orw $511, (%rsi) # imm = 0x1FF
-; BDVER2-NEXT: # sched: [6:1.00]
-; BDVER2-NEXT: orw $7, %di # sched: [1:0.50]
-; BDVER2-NEXT: orw $7, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: orw %dx, %di # sched: [1:0.50]
-; BDVER2-NEXT: orw %di, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: orw (%rsi), %di # sched: [5:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_or_16:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: orw $511, %ax # imm = 0x1FF
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: orw $511, %di # imm = 0x1FF
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: orw $511, (%rsi) # imm = 0x1FF
-; BTVER2-NEXT: # sched: [5:1.00]
-; BTVER2-NEXT: orw $7, %di # sched: [1:0.50]
-; BTVER2-NEXT: orw $7, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: orw %dx, %di # sched: [1:0.50]
-; BTVER2-NEXT: orw %di, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: orw (%rsi), %di # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_or_16:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: orw $511, %ax # imm = 0x1FF
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: orw $511, %di # imm = 0x1FF
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: orw $511, (%rsi) # imm = 0x1FF
-; ZNVER1-NEXT: # sched: [5:0.50]
-; ZNVER1-NEXT: orw $7, %di # sched: [1:0.25]
-; ZNVER1-NEXT: orw $7, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: orw %dx, %di # sched: [1:0.25]
-; ZNVER1-NEXT: orw %di, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: orw (%rsi), %di # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "orw $3, %AX \0A\09 orw $3, $0 \0A\09 orw $3, $2 \0A\09 orw $4, $0 \0A\09 orw $4, $2 \0A\09 orw $1, $0 \0A\09 orw $0, $2 \0A\09 orw $2, $0", "r,r,*m,i,i"(i16 %a0, i16 %a2, i16* %a1, i16 511, i8 7) nounwind
- ret void
-}
-define void @test_or_32(i32 %a0, i32* %a1, i32 %a2) optsize {
-; GENERIC-LABEL: test_or_32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: orl $665536, %eax # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: orl $665536, %edi # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: orl $665536, (%rsi) # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [7:1.00]
-; GENERIC-NEXT: orl $7, %edi # sched: [1:0.33]
-; GENERIC-NEXT: orl $7, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: orl %edx, %edi # sched: [1:0.33]
-; GENERIC-NEXT: orl %edi, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: orl (%rsi), %edi # sched: [6:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_or_32:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: orl $665536, %eax # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: orl $665536, %edi # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: orl $665536, (%rsi) # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:1.00]
-; ATOM-NEXT: orl $7, %edi # sched: [1:0.50]
-; ATOM-NEXT: orl $7, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: orl %edx, %edi # sched: [1:0.50]
-; ATOM-NEXT: orl %edi, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: orl (%rsi), %edi # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_or_32:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: orl $665536, %eax # imm = 0xA27C0
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: orl $665536, %edi # imm = 0xA27C0
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: orl $665536, (%rsi) # imm = 0xA27C0
-; SLM-NEXT: # sched: [5:2.00]
-; SLM-NEXT: orl $7, %edi # sched: [1:0.50]
-; SLM-NEXT: orl $7, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: orl %edx, %edi # sched: [1:0.50]
-; SLM-NEXT: orl %edi, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: orl (%rsi), %edi # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_or_32:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: orl $665536, %eax # imm = 0xA27C0
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: orl $665536, %edi # imm = 0xA27C0
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: orl $665536, (%rsi) # imm = 0xA27C0
-; SANDY-NEXT: # sched: [7:1.00]
-; SANDY-NEXT: orl $7, %edi # sched: [1:0.33]
-; SANDY-NEXT: orl $7, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: orl %edx, %edi # sched: [1:0.33]
-; SANDY-NEXT: orl %edi, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: orl (%rsi), %edi # sched: [6:0.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_or_32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: orl $665536, %eax # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: orl $665536, %edi # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: orl $665536, (%rsi) # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [7:1.00]
-; HASWELL-NEXT: orl $7, %edi # sched: [1:0.25]
-; HASWELL-NEXT: orl $7, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: orl %edx, %edi # sched: [1:0.25]
-; HASWELL-NEXT: orl %edi, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: orl (%rsi), %edi # sched: [6:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_or_32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: orl $665536, %eax # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: orl $665536, %edi # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: orl $665536, (%rsi) # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [7:1.00]
-; BROADWELL-NEXT: orl $7, %edi # sched: [1:0.25]
-; BROADWELL-NEXT: orl $7, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: orl %edx, %edi # sched: [1:0.25]
-; BROADWELL-NEXT: orl %edi, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: orl (%rsi), %edi # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_or_32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: orl $665536, %eax # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: orl $665536, %edi # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: orl $665536, (%rsi) # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [7:1.00]
-; SKYLAKE-NEXT: orl $7, %edi # sched: [1:0.25]
-; SKYLAKE-NEXT: orl $7, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: orl %edx, %edi # sched: [1:0.25]
-; SKYLAKE-NEXT: orl %edi, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: orl (%rsi), %edi # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_or_32:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: orl $665536, %eax # imm = 0xA27C0
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: orl $665536, %edi # imm = 0xA27C0
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: orl $665536, (%rsi) # imm = 0xA27C0
-; SKX-NEXT: # sched: [7:1.00]
-; SKX-NEXT: orl $7, %edi # sched: [1:0.25]
-; SKX-NEXT: orl $7, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: orl %edx, %edi # sched: [1:0.25]
-; SKX-NEXT: orl %edi, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: orl (%rsi), %edi # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_or_32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: orl $665536, %eax # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: orl $665536, %edi # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: orl $665536, (%rsi) # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [6:1.00]
-; BDVER2-NEXT: orl $7, %edi # sched: [1:0.50]
-; BDVER2-NEXT: orl $7, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: orl %edx, %edi # sched: [1:0.50]
-; BDVER2-NEXT: orl %edi, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: orl (%rsi), %edi # sched: [5:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_or_32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: orl $665536, %eax # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: orl $665536, %edi # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: orl $665536, (%rsi) # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [5:1.00]
-; BTVER2-NEXT: orl $7, %edi # sched: [1:0.50]
-; BTVER2-NEXT: orl $7, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: orl %edx, %edi # sched: [1:0.50]
-; BTVER2-NEXT: orl %edi, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: orl (%rsi), %edi # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_or_32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: orl $665536, %eax # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: orl $665536, %edi # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: orl $665536, (%rsi) # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [5:0.50]
-; ZNVER1-NEXT: orl $7, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: orl $7, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: orl %edx, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: orl %edi, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: orl (%rsi), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "orl $3, %EAX \0A\09 orl $3, $0 \0A\09 orl $3, $2 \0A\09 orl $4, $0 \0A\09 orl $4, $2 \0A\09 orl $1, $0 \0A\09 orl $0, $2 \0A\09 orl $2, $0", "r,r,*m,i,i"(i32 %a0, i32 %a2, i32* %a1, i32 665536, i8 7) nounwind
- ret void
-}
-define void @test_or_64(i64 %a0, i64* %a1, i64 %a2) optsize {
-; GENERIC-LABEL: test_or_64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: orq $665536, %rax # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: orq $665536, %rdi # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: orq $665536, (%rsi) # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [7:1.00]
-; GENERIC-NEXT: orq $7, %rdi # sched: [1:0.33]
-; GENERIC-NEXT: orq $7, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: orq %rdx, %rdi # sched: [1:0.33]
-; GENERIC-NEXT: orq %rdi, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: orq (%rsi), %rdi # sched: [6:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_or_64:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: orq $665536, %rax # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: orq $665536, %rdi # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: orq $665536, (%rsi) # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:1.00]
-; ATOM-NEXT: orq $7, %rdi # sched: [1:0.50]
-; ATOM-NEXT: orq $7, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: orq %rdx, %rdi # sched: [1:0.50]
-; ATOM-NEXT: orq %rdi, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: orq (%rsi), %rdi # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_or_64:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: orq $665536, %rax # imm = 0xA27C0
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: orq $665536, %rdi # imm = 0xA27C0
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: orq $665536, (%rsi) # imm = 0xA27C0
-; SLM-NEXT: # sched: [5:2.00]
-; SLM-NEXT: orq $7, %rdi # sched: [1:0.50]
-; SLM-NEXT: orq $7, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: orq %rdx, %rdi # sched: [1:0.50]
-; SLM-NEXT: orq %rdi, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: orq (%rsi), %rdi # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_or_64:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: orq $665536, %rax # imm = 0xA27C0
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: orq $665536, %rdi # imm = 0xA27C0
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: orq $665536, (%rsi) # imm = 0xA27C0
-; SANDY-NEXT: # sched: [7:1.00]
-; SANDY-NEXT: orq $7, %rdi # sched: [1:0.33]
-; SANDY-NEXT: orq $7, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: orq %rdx, %rdi # sched: [1:0.33]
-; SANDY-NEXT: orq %rdi, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: orq (%rsi), %rdi # sched: [6:0.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_or_64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: orq $665536, %rax # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: orq $665536, %rdi # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: orq $665536, (%rsi) # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [7:1.00]
-; HASWELL-NEXT: orq $7, %rdi # sched: [1:0.25]
-; HASWELL-NEXT: orq $7, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: orq %rdx, %rdi # sched: [1:0.25]
-; HASWELL-NEXT: orq %rdi, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: orq (%rsi), %rdi # sched: [6:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_or_64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: orq $665536, %rax # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: orq $665536, %rdi # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: orq $665536, (%rsi) # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [7:1.00]
-; BROADWELL-NEXT: orq $7, %rdi # sched: [1:0.25]
-; BROADWELL-NEXT: orq $7, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: orq %rdx, %rdi # sched: [1:0.25]
-; BROADWELL-NEXT: orq %rdi, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: orq (%rsi), %rdi # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_or_64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: orq $665536, %rax # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: orq $665536, %rdi # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: orq $665536, (%rsi) # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [7:1.00]
-; SKYLAKE-NEXT: orq $7, %rdi # sched: [1:0.25]
-; SKYLAKE-NEXT: orq $7, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: orq %rdx, %rdi # sched: [1:0.25]
-; SKYLAKE-NEXT: orq %rdi, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: orq (%rsi), %rdi # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_or_64:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: orq $665536, %rax # imm = 0xA27C0
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: orq $665536, %rdi # imm = 0xA27C0
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: orq $665536, (%rsi) # imm = 0xA27C0
-; SKX-NEXT: # sched: [7:1.00]
-; SKX-NEXT: orq $7, %rdi # sched: [1:0.25]
-; SKX-NEXT: orq $7, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: orq %rdx, %rdi # sched: [1:0.25]
-; SKX-NEXT: orq %rdi, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: orq (%rsi), %rdi # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_or_64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: orq $665536, %rax # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: orq $665536, %rdi # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: orq $665536, (%rsi) # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [6:1.00]
-; BDVER2-NEXT: orq $7, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: orq $7, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: orq %rdx, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: orq %rdi, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: orq (%rsi), %rdi # sched: [5:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_or_64:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: orq $665536, %rax # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: orq $665536, %rdi # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: orq $665536, (%rsi) # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [5:1.00]
-; BTVER2-NEXT: orq $7, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: orq $7, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: orq %rdx, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: orq %rdi, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: orq (%rsi), %rdi # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_or_64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: orq $665536, %rax # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: orq $665536, %rdi # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: orq $665536, (%rsi) # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [5:0.50]
-; ZNVER1-NEXT: orq $7, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: orq $7, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: orq %rdx, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: orq %rdi, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: orq (%rsi), %rdi # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "orq $3, %RAX \0A\09 orq $3, $0 \0A\09 orq $3, $2 \0A\09 orq $4, $0 \0A\09 orq $4, $2 \0A\09 orq $1, $0 \0A\09 orq $0, $2 \0A\09 orq $2, $0", "r,r,*m,i,i"(i64 %a0, i64 %a2, i64* %a1, i32 665536, i8 7) nounwind
- ret void
-}
-
-define void @test_out() optsize {
-; GENERIC-LABEL: test_out:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: outb %al, $7 # sched: [100:0.33]
-; GENERIC-NEXT: outw %ax, $7 # sched: [100:0.33]
-; GENERIC-NEXT: outl %eax, $7 # sched: [100:0.33]
-; GENERIC-NEXT: outb %al, %dx # sched: [100:0.33]
-; GENERIC-NEXT: outw %ax, %dx # sched: [100:0.33]
-; GENERIC-NEXT: outl %eax, %dx # sched: [100:0.33]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_out:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: outb %al, $7 # sched: [72:36.00]
-; ATOM-NEXT: outw %ax, $7 # sched: [72:36.00]
-; ATOM-NEXT: outl %eax, $7 # sched: [72:36.00]
-; ATOM-NEXT: outb %al, %dx # sched: [68:34.00]
-; ATOM-NEXT: outw %ax, %dx # sched: [68:34.00]
-; ATOM-NEXT: outl %eax, %dx # sched: [68:34.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_out:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: outb %al, $7 # sched: [100:1.00]
-; SLM-NEXT: outw %ax, $7 # sched: [100:1.00]
-; SLM-NEXT: outl %eax, $7 # sched: [100:1.00]
-; SLM-NEXT: outb %al, %dx # sched: [100:1.00]
-; SLM-NEXT: outw %ax, %dx # sched: [100:1.00]
-; SLM-NEXT: outl %eax, %dx # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_out:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: outb %al, $7 # sched: [100:0.33]
-; SANDY-NEXT: outw %ax, $7 # sched: [100:0.33]
-; SANDY-NEXT: outl %eax, $7 # sched: [100:0.33]
-; SANDY-NEXT: outb %al, %dx # sched: [100:0.33]
-; SANDY-NEXT: outw %ax, %dx # sched: [100:0.33]
-; SANDY-NEXT: outl %eax, %dx # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_out:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: outb %al, $7 # sched: [36:5.00]
-; HASWELL-NEXT: outw %ax, $7 # sched: [36:5.00]
-; HASWELL-NEXT: outl %eax, $7 # sched: [36:5.00]
-; HASWELL-NEXT: outb %al, %dx # sched: [36:5.00]
-; HASWELL-NEXT: outw %ax, %dx # sched: [36:5.00]
-; HASWELL-NEXT: outl %eax, %dx # sched: [36:5.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_out:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: outb %al, $7 # sched: [35:5.00]
-; BROADWELL-NEXT: outw %ax, $7 # sched: [35:5.00]
-; BROADWELL-NEXT: outl %eax, $7 # sched: [35:5.00]
-; BROADWELL-NEXT: outb %al, %dx # sched: [35:5.00]
-; BROADWELL-NEXT: outw %ax, %dx # sched: [35:5.00]
-; BROADWELL-NEXT: outl %eax, %dx # sched: [35:5.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_out:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: outb %al, $7 # sched: [35:5.00]
-; SKYLAKE-NEXT: outw %ax, $7 # sched: [35:5.00]
-; SKYLAKE-NEXT: outl %eax, $7 # sched: [35:5.00]
-; SKYLAKE-NEXT: outb %al, %dx # sched: [35:5.00]
-; SKYLAKE-NEXT: outw %ax, %dx # sched: [35:5.00]
-; SKYLAKE-NEXT: outl %eax, %dx # sched: [35:5.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_out:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: outb %al, $7 # sched: [35:5.00]
-; SKX-NEXT: outw %ax, $7 # sched: [35:5.00]
-; SKX-NEXT: outl %eax, $7 # sched: [35:5.00]
-; SKX-NEXT: outb %al, %dx # sched: [35:5.00]
-; SKX-NEXT: outw %ax, %dx # sched: [35:5.00]
-; SKX-NEXT: outl %eax, %dx # sched: [35:5.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_out:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: outb %al, $7 # sched: [100:0.50]
-; BDVER2-NEXT: outw %ax, $7 # sched: [100:0.50]
-; BDVER2-NEXT: outl %eax, $7 # sched: [100:0.50]
-; BDVER2-NEXT: outb %al, %dx # sched: [100:0.50]
-; BDVER2-NEXT: outw %ax, %dx # sched: [100:0.50]
-; BDVER2-NEXT: outl %eax, %dx # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_out:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: outb %al, $7 # sched: [100:0.50]
-; BTVER2-NEXT: outw %ax, $7 # sched: [100:0.50]
-; BTVER2-NEXT: outl %eax, $7 # sched: [100:0.50]
-; BTVER2-NEXT: outb %al, %dx # sched: [100:0.50]
-; BTVER2-NEXT: outw %ax, %dx # sched: [100:0.50]
-; BTVER2-NEXT: outl %eax, %dx # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_out:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: outb %al, $7 # sched: [100:0.25]
-; ZNVER1-NEXT: outw %ax, $7 # sched: [100:0.25]
-; ZNVER1-NEXT: outl %eax, $7 # sched: [100:0.25]
-; ZNVER1-NEXT: outb %al, %dx # sched: [100:0.25]
-; ZNVER1-NEXT: outw %ax, %dx # sched: [100:0.25]
-; ZNVER1-NEXT: outl %eax, %dx # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "outb %AL, $0 \0A\09 outw %AX, $0 \0A\09 outl %EAX, $0 \0A\09 outb %AL, %DX \0A\09 outw %AX, %DX \0A\09 outl %EAX, %DX", "i"(i8 7) nounwind
- ret void
-}
-
-define void @test_outs() optsize {
-; GENERIC-LABEL: test_outs:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: outsb (%rsi), %dx # sched: [100:0.33]
-; GENERIC-NEXT: outsw (%rsi), %dx # sched: [100:0.33]
-; GENERIC-NEXT: outsl (%rsi), %dx # sched: [100:0.33]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_outs:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: outsb (%rsi), %dx # sched: [74:37.00]
-; ATOM-NEXT: outsw (%rsi), %dx # sched: [74:37.00]
-; ATOM-NEXT: outsl (%rsi), %dx # sched: [74:37.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_outs:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: outsb (%rsi), %dx # sched: [100:1.00]
-; SLM-NEXT: outsw (%rsi), %dx # sched: [100:1.00]
-; SLM-NEXT: outsl (%rsi), %dx # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_outs:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: outsb (%rsi), %dx # sched: [100:0.33]
-; SANDY-NEXT: outsw (%rsi), %dx # sched: [100:0.33]
-; SANDY-NEXT: outsl (%rsi), %dx # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_outs:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: outsb (%rsi), %dx # sched: [100:0.25]
-; HASWELL-NEXT: outsw (%rsi), %dx # sched: [100:0.25]
-; HASWELL-NEXT: outsl (%rsi), %dx # sched: [100:0.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_outs:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: outsb (%rsi), %dx # sched: [100:0.25]
-; BROADWELL-NEXT: outsw (%rsi), %dx # sched: [100:0.25]
-; BROADWELL-NEXT: outsl (%rsi), %dx # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_outs:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: outsb (%rsi), %dx # sched: [100:0.25]
-; SKYLAKE-NEXT: outsw (%rsi), %dx # sched: [100:0.25]
-; SKYLAKE-NEXT: outsl (%rsi), %dx # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_outs:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: outsb (%rsi), %dx # sched: [100:0.25]
-; SKX-NEXT: outsw (%rsi), %dx # sched: [100:0.25]
-; SKX-NEXT: outsl (%rsi), %dx # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_outs:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: outsb (%rsi), %dx # sched: [100:0.50]
-; BDVER2-NEXT: outsw (%rsi), %dx # sched: [100:0.50]
-; BDVER2-NEXT: outsl (%rsi), %dx # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_outs:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: outsb (%rsi), %dx # sched: [100:0.50]
-; BTVER2-NEXT: outsw (%rsi), %dx # sched: [100:0.50]
-; BTVER2-NEXT: outsl (%rsi), %dx # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_outs:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: outsb (%rsi), %dx # sched: [100:0.25]
-; ZNVER1-NEXT: outsw (%rsi), %dx # sched: [100:0.25]
-; ZNVER1-NEXT: outsl (%rsi), %dx # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "outsb \0A\09 outsw \0A\09 outsl", ""()
- ret void
-}
-
-define void @test_pause() optsize {
-; GENERIC-LABEL: test_pause:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: pause # sched: [4:1.33]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pause:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: pause # sched: [17:8.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pause:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: pause # sched: [1:0.50]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pause:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: pause # sched: [4:1.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pause:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: pause # sched: [5:1.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pause:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: pause # sched: [5:1.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pause:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: pause # sched: [4:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pause:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: pause # sched: [140:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_pause:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: pause # sched: [1:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pause:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: pause # sched: [1:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pause:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: pause # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "pause", ""()
- ret void
-}
-
-define void @test_pop_push() optsize {
-; GENERIC-LABEL: test_pop_push:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: popq %fs # sched: [100:0.33]
-; GENERIC-NEXT: popq %gs # sched: [100:0.33]
-; GENERIC-NEXT: pushq %fs # sched: [3:1.00]
-; GENERIC-NEXT: pushq %gs # sched: [5:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pop_push:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: popq %fs # sched: [29:14.50]
-; ATOM-NEXT: popq %gs # sched: [29:14.50]
-; ATOM-NEXT: pushq %fs # sched: [2:1.00]
-; ATOM-NEXT: pushq %gs # sched: [2:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pop_push:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: popq %fs # sched: [100:1.00]
-; SLM-NEXT: popq %gs # sched: [100:1.00]
-; SLM-NEXT: pushq %fs # sched: [100:1.00]
-; SLM-NEXT: pushq %gs # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pop_push:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: popq %fs # sched: [100:0.33]
-; SANDY-NEXT: popq %gs # sched: [100:0.33]
-; SANDY-NEXT: pushq %fs # sched: [3:1.00]
-; SANDY-NEXT: pushq %gs # sched: [5:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pop_push:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: popq %fs # sched: [100:0.25]
-; HASWELL-NEXT: popq %gs # sched: [100:0.25]
-; HASWELL-NEXT: pushq %fs # sched: [100:0.25]
-; HASWELL-NEXT: pushq %gs # sched: [100:0.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pop_push:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: popq %fs # sched: [100:0.25]
-; BROADWELL-NEXT: popq %gs # sched: [100:0.25]
-; BROADWELL-NEXT: pushq %fs # sched: [100:0.25]
-; BROADWELL-NEXT: pushq %gs # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pop_push:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: popq %fs # sched: [100:0.25]
-; SKYLAKE-NEXT: popq %gs # sched: [100:0.25]
-; SKYLAKE-NEXT: pushq %fs # sched: [100:0.25]
-; SKYLAKE-NEXT: pushq %gs # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pop_push:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: popq %fs # sched: [100:0.25]
-; SKX-NEXT: popq %gs # sched: [100:0.25]
-; SKX-NEXT: pushq %fs # sched: [100:0.25]
-; SKX-NEXT: pushq %gs # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_pop_push:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: popq %fs # sched: [100:0.50]
-; BDVER2-NEXT: popq %gs # sched: [100:0.50]
-; BDVER2-NEXT: pushq %fs # sched: [100:0.50]
-; BDVER2-NEXT: pushq %gs # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pop_push:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: popq %fs # sched: [100:0.50]
-; BTVER2-NEXT: popq %gs # sched: [100:0.50]
-; BTVER2-NEXT: pushq %fs # sched: [100:0.50]
-; BTVER2-NEXT: pushq %gs # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pop_push:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: popq %fs # sched: [100:0.25]
-; ZNVER1-NEXT: popq %gs # sched: [100:0.25]
-; ZNVER1-NEXT: pushq %fs # sched: [100:0.25]
-; ZNVER1-NEXT: pushq %gs # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "pop %FS \0A\09 pop %GS \0A\09 push %FS \0A\09 push %GS", ""()
- ret void
-}
-define i16 @test_pop_push_16(i16 %a0, i16 *%a1) optsize {
-; GENERIC-LABEL: test_pop_push_16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: popw %ax # sched: [6:0.50]
-; GENERIC-NEXT: popw (%rsi) # sched: [6:0.50]
-; GENERIC-NEXT: pushw %di # sched: [5:1.00]
-; GENERIC-NEXT: pushw (%rsi) # sched: [5:1.00]
-; GENERIC-NEXT: pushw $4095 # imm = 0xFFF
-; GENERIC-NEXT: # sched: [1:1.00]
-; GENERIC-NEXT: pushw $7 # sched: [1:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pop_push_16:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: popw %ax # sched: [2:1.00]
-; ATOM-NEXT: popw (%rsi) # sched: [3:1.50]
-; ATOM-NEXT: pushw %di # sched: [1:1.00]
-; ATOM-NEXT: pushw (%rsi) # sched: [2:1.00]
-; ATOM-NEXT: pushw $4095 # imm = 0xFFF
-; ATOM-NEXT: # sched: [1:1.00]
-; ATOM-NEXT: pushw $7 # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pop_push_16:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: popw %ax # sched: [3:1.00]
-; SLM-NEXT: popw (%rsi) # sched: [4:2.00]
-; SLM-NEXT: pushw %di # sched: [1:1.00]
-; SLM-NEXT: pushw (%rsi) # sched: [4:2.00]
-; SLM-NEXT: pushw $4095 # imm = 0xFFF
-; SLM-NEXT: # sched: [1:1.00]
-; SLM-NEXT: pushw $7 # sched: [1:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pop_push_16:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: popw %ax # sched: [6:0.50]
-; SANDY-NEXT: popw (%rsi) # sched: [6:0.50]
-; SANDY-NEXT: pushw %di # sched: [5:1.00]
-; SANDY-NEXT: pushw (%rsi) # sched: [5:1.00]
-; SANDY-NEXT: pushw $4095 # imm = 0xFFF
-; SANDY-NEXT: # sched: [1:1.00]
-; SANDY-NEXT: pushw $7 # sched: [1:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pop_push_16:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: popw %ax # sched: [6:0.50]
-; HASWELL-NEXT: popw (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: pushw %di # sched: [2:1.00]
-; HASWELL-NEXT: pushw (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: pushw $4095 # imm = 0xFFF
-; HASWELL-NEXT: # sched: [1:1.00]
-; HASWELL-NEXT: pushw $7 # sched: [1:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pop_push_16:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: popw %ax # sched: [6:0.50]
-; BROADWELL-NEXT: popw (%rsi) # sched: [6:1.00]
-; BROADWELL-NEXT: pushw %di # sched: [2:1.00]
-; BROADWELL-NEXT: pushw (%rsi) # sched: [6:1.00]
-; BROADWELL-NEXT: pushw $4095 # imm = 0xFFF
-; BROADWELL-NEXT: # sched: [1:1.00]
-; BROADWELL-NEXT: pushw $7 # sched: [1:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pop_push_16:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: popw %ax # sched: [6:0.50]
-; SKYLAKE-NEXT: popw (%rsi) # sched: [6:1.00]
-; SKYLAKE-NEXT: pushw %di # sched: [2:1.00]
-; SKYLAKE-NEXT: pushw (%rsi) # sched: [6:1.00]
-; SKYLAKE-NEXT: pushw $4095 # imm = 0xFFF
-; SKYLAKE-NEXT: # sched: [1:1.00]
-; SKYLAKE-NEXT: pushw $7 # sched: [1:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pop_push_16:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: popw %ax # sched: [6:0.50]
-; SKX-NEXT: popw (%rsi) # sched: [6:1.00]
-; SKX-NEXT: pushw %di # sched: [2:1.00]
-; SKX-NEXT: pushw (%rsi) # sched: [6:1.00]
-; SKX-NEXT: pushw $4095 # imm = 0xFFF
-; SKX-NEXT: # sched: [1:1.00]
-; SKX-NEXT: pushw $7 # sched: [1:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_pop_push_16:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: popw %ax # sched: [5:0.50]
-; BDVER2-NEXT: popw (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: pushw %di # sched: [1:1.00]
-; BDVER2-NEXT: pushw (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: pushw $4095 # imm = 0xFFF
-; BDVER2-NEXT: # sched: [1:1.00]
-; BDVER2-NEXT: pushw $7 # sched: [1:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pop_push_16:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: popw %ax # sched: [3:1.00]
-; BTVER2-NEXT: popw (%rsi) # sched: [4:1.00]
-; BTVER2-NEXT: pushw %di # sched: [1:1.00]
-; BTVER2-NEXT: pushw (%rsi) # sched: [4:1.00]
-; BTVER2-NEXT: pushw $4095 # imm = 0xFFF
-; BTVER2-NEXT: # sched: [1:1.00]
-; BTVER2-NEXT: pushw $7 # sched: [1:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pop_push_16:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: popw %ax # sched: [8:0.50]
-; ZNVER1-NEXT: popw (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: pushw %di # sched: [1:0.50]
-; ZNVER1-NEXT: pushw (%rsi) # sched: [4:0.50]
-; ZNVER1-NEXT: pushw $4095 # imm = 0xFFF
-; ZNVER1-NEXT: # sched: [1:0.50]
-; ZNVER1-NEXT: pushw $7 # sched: [1:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call i16 asm sideeffect "popw $0 \0A\09 popw $2 \0A\09 pushw $1 \0A\09 pushw $2 \0A\09 pushw $3 \0A\09 pushw $4", "=r,r,*m,i,i"(i16 %a0, i16 *%a1, i16 4095, i8 7)
- ret i16 %1
-}
-define i64 @test_pop_push_64(i64 %a0, i64 *%a1) optsize {
-; GENERIC-LABEL: test_pop_push_64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: popq %rax # sched: [6:0.50]
-; GENERIC-NEXT: popq (%rsi) # sched: [6:0.50]
-; GENERIC-NEXT: pushq %rdi # sched: [5:1.00]
-; GENERIC-NEXT: pushq (%rsi) # sched: [5:1.00]
-; GENERIC-NEXT: pushq $4095 # imm = 0xFFF
-; GENERIC-NEXT: # sched: [1:1.00]
-; GENERIC-NEXT: pushq $7 # sched: [5:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pop_push_64:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: popq %rax # sched: [1:1.00]
-; ATOM-NEXT: popq (%rsi) # sched: [3:1.50]
-; ATOM-NEXT: pushq %rdi # sched: [1:1.00]
-; ATOM-NEXT: pushq (%rsi) # sched: [2:1.00]
-; ATOM-NEXT: pushq $4095 # imm = 0xFFF
-; ATOM-NEXT: # sched: [1:1.00]
-; ATOM-NEXT: pushq $7 # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pop_push_64:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: popq %rax # sched: [3:1.00]
-; SLM-NEXT: popq (%rsi) # sched: [4:2.00]
-; SLM-NEXT: pushq %rdi # sched: [1:1.00]
-; SLM-NEXT: pushq (%rsi) # sched: [4:2.00]
-; SLM-NEXT: pushq $4095 # imm = 0xFFF
-; SLM-NEXT: # sched: [1:1.00]
-; SLM-NEXT: pushq $7 # sched: [1:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_pop_push_64:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: popq %rax # sched: [6:0.50]
-; SANDY-NEXT: popq (%rsi) # sched: [6:0.50]
-; SANDY-NEXT: pushq %rdi # sched: [5:1.00]
-; SANDY-NEXT: pushq (%rsi) # sched: [5:1.00]
-; SANDY-NEXT: pushq $4095 # imm = 0xFFF
-; SANDY-NEXT: # sched: [1:1.00]
-; SANDY-NEXT: pushq $7 # sched: [5:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_pop_push_64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: popq %rax # sched: [6:0.50]
-; HASWELL-NEXT: popq (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: pushq %rdi # sched: [2:1.00]
-; HASWELL-NEXT: pushq (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: pushq $4095 # imm = 0xFFF
-; HASWELL-NEXT: # sched: [1:1.00]
-; HASWELL-NEXT: pushq $7 # sched: [2:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pop_push_64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: popq %rax # sched: [6:0.50]
-; BROADWELL-NEXT: popq (%rsi) # sched: [6:1.00]
-; BROADWELL-NEXT: pushq %rdi # sched: [2:1.00]
-; BROADWELL-NEXT: pushq (%rsi) # sched: [6:1.00]
-; BROADWELL-NEXT: pushq $4095 # imm = 0xFFF
-; BROADWELL-NEXT: # sched: [1:1.00]
-; BROADWELL-NEXT: pushq $7 # sched: [2:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pop_push_64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: popq %rax # sched: [6:0.50]
-; SKYLAKE-NEXT: popq (%rsi) # sched: [6:1.00]
-; SKYLAKE-NEXT: pushq %rdi # sched: [2:1.00]
-; SKYLAKE-NEXT: pushq (%rsi) # sched: [6:1.00]
-; SKYLAKE-NEXT: pushq $4095 # imm = 0xFFF
-; SKYLAKE-NEXT: # sched: [1:1.00]
-; SKYLAKE-NEXT: pushq $7 # sched: [2:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pop_push_64:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: popq %rax # sched: [6:0.50]
-; SKX-NEXT: popq (%rsi) # sched: [6:1.00]
-; SKX-NEXT: pushq %rdi # sched: [2:1.00]
-; SKX-NEXT: pushq (%rsi) # sched: [6:1.00]
-; SKX-NEXT: pushq $4095 # imm = 0xFFF
-; SKX-NEXT: # sched: [1:1.00]
-; SKX-NEXT: pushq $7 # sched: [2:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_pop_push_64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: popq %rax # sched: [5:0.50]
-; BDVER2-NEXT: popq (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: pushq %rdi # sched: [1:1.00]
-; BDVER2-NEXT: pushq (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: pushq $4095 # imm = 0xFFF
-; BDVER2-NEXT: # sched: [1:1.00]
-; BDVER2-NEXT: pushq $7 # sched: [1:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_pop_push_64:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: popq %rax # sched: [3:1.00]
-; BTVER2-NEXT: popq (%rsi) # sched: [4:1.00]
-; BTVER2-NEXT: pushq %rdi # sched: [1:1.00]
-; BTVER2-NEXT: pushq (%rsi) # sched: [4:1.00]
-; BTVER2-NEXT: pushq $4095 # imm = 0xFFF
-; BTVER2-NEXT: # sched: [1:1.00]
-; BTVER2-NEXT: pushq $7 # sched: [1:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_pop_push_64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: popq %rax # sched: [8:0.50]
-; ZNVER1-NEXT: popq (%rsi) # sched: [9:1.00]
-; ZNVER1-NEXT: pushq %rdi # sched: [1:0.50]
-; ZNVER1-NEXT: pushq (%rsi) # sched: [9:1.00]
-; ZNVER1-NEXT: pushq $4095 # imm = 0xFFF
-; ZNVER1-NEXT: # sched: [1:0.50]
-; ZNVER1-NEXT: pushq $7 # sched: [1:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call i64 asm sideeffect "popq $0 \0A\09 popq $2 \0A\09 pushq $1 \0A\09 pushq $2 \0A\09 pushq $3 \0A\09 pushq $4", "=r,r,*m,i,i"(i64 %a0, i64 *%a1, i64 4095, i8 7)
- ret i64 %1
-}
-
-define void @test_popf_pushf() optsize {
-; GENERIC-LABEL: test_popf_pushf:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: popfq # sched: [5:0.50]
-; GENERIC-NEXT: pushfq # sched: [5:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_popf_pushf:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: popfq # sched: [26:13.00]
-; ATOM-NEXT: pushfq # sched: [9:4.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_popf_pushf:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: popfq # sched: [3:1.00]
-; SLM-NEXT: pushfq # sched: [1:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_popf_pushf:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: popfq # sched: [5:0.50]
-; SANDY-NEXT: pushfq # sched: [5:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_popf_pushf:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: popfq # sched: [5:0.50]
-; HASWELL-NEXT: pushfq # sched: [5:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_popf_pushf:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: popfq # sched: [22:4.25]
-; BROADWELL-NEXT: pushfq # sched: [5:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_popf_pushf:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: popfq # sched: [5:0.50]
-; SKYLAKE-NEXT: pushfq # sched: [5:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_popf_pushf:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: popfq # sched: [5:0.50]
-; SKX-NEXT: pushfq # sched: [5:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_popf_pushf:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: popfq # sched: [5:0.50]
-; BDVER2-NEXT: pushfq # sched: [1:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_popf_pushf:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: popfq # sched: [3:1.00]
-; BTVER2-NEXT: pushfq # sched: [1:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_popf_pushf:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: popfq # sched: [8:0.50]
-; ZNVER1-NEXT: pushfq # sched: [1:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "popf \0A\09 pushf", ""()
- ret void
-}
-
-define void @test_rcl_rcr_8(i8 %a0, i8 %a1, i8 *%a2) optsize {
-; GENERIC-LABEL: test_rcl_rcr_8:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: rclb %dil # sched: [2:1.50]
-; GENERIC-NEXT: rcrb %dil # sched: [2:1.50]
-; GENERIC-NEXT: rclb (%rdx) # sched: [11:3.50]
-; GENERIC-NEXT: rcrb (%rdx) # sched: [11:3.50]
-; GENERIC-NEXT: rclb $7, %dil # sched: [5:4.00]
-; GENERIC-NEXT: rcrb $7, %dil # sched: [5:4.00]
-; GENERIC-NEXT: rclb $7, (%rdx) # sched: [11:3.50]
-; GENERIC-NEXT: rcrb $7, (%rdx) # sched: [11:3.50]
-; GENERIC-NEXT: rclb %cl, %dil # sched: [5:4.00]
-; GENERIC-NEXT: rcrb %cl, %dil # sched: [5:4.00]
-; GENERIC-NEXT: rclb %cl, (%rdx) # sched: [11:3.50]
-; GENERIC-NEXT: rcrb %cl, (%rdx) # sched: [11:3.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_rcl_rcr_8:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: rclb %dil # sched: [1:1.00]
-; ATOM-NEXT: rcrb %dil # sched: [1:1.00]
-; ATOM-NEXT: rclb (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: rcrb (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: rclb $7, %dil # sched: [1:1.00]
-; ATOM-NEXT: rcrb $7, %dil # sched: [1:1.00]
-; ATOM-NEXT: rclb $7, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: rcrb $7, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: rclb %cl, %dil # sched: [1:1.00]
-; ATOM-NEXT: rcrb %cl, %dil # sched: [1:1.00]
-; ATOM-NEXT: rclb %cl, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: rcrb %cl, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_rcl_rcr_8:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: rclb %dil # sched: [1:1.00]
-; SLM-NEXT: rcrb %dil # sched: [1:1.00]
-; SLM-NEXT: rclb (%rdx) # sched: [4:2.00]
-; SLM-NEXT: rcrb (%rdx) # sched: [4:2.00]
-; SLM-NEXT: rclb $7, %dil # sched: [1:1.00]
-; SLM-NEXT: rcrb $7, %dil # sched: [1:1.00]
-; SLM-NEXT: rclb $7, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: rcrb $7, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: rclb %cl, %dil # sched: [1:1.00]
-; SLM-NEXT: rcrb %cl, %dil # sched: [1:1.00]
-; SLM-NEXT: rclb %cl, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: rcrb %cl, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_rcl_rcr_8:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: rclb %dil # sched: [2:1.50]
-; SANDY-NEXT: rcrb %dil # sched: [2:1.50]
-; SANDY-NEXT: rclb (%rdx) # sched: [11:3.50]
-; SANDY-NEXT: rcrb (%rdx) # sched: [11:3.50]
-; SANDY-NEXT: rclb $7, %dil # sched: [5:4.00]
-; SANDY-NEXT: rcrb $7, %dil # sched: [5:4.00]
-; SANDY-NEXT: rclb $7, (%rdx) # sched: [11:3.50]
-; SANDY-NEXT: rcrb $7, (%rdx) # sched: [11:3.50]
-; SANDY-NEXT: rclb %cl, %dil # sched: [5:4.00]
-; SANDY-NEXT: rcrb %cl, %dil # sched: [5:4.00]
-; SANDY-NEXT: rclb %cl, (%rdx) # sched: [11:3.50]
-; SANDY-NEXT: rcrb %cl, (%rdx) # sched: [11:3.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_rcl_rcr_8:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: rclb %dil # sched: [3:0.75]
-; HASWELL-NEXT: rcrb %dil # sched: [3:0.75]
-; HASWELL-NEXT: rclb (%rdx) # sched: [9:0.75]
-; HASWELL-NEXT: rcrb (%rdx) # sched: [9:0.75]
-; HASWELL-NEXT: rclb $7, %dil # sched: [3:0.75]
-; HASWELL-NEXT: rcrb $7, %dil # sched: [3:0.75]
-; HASWELL-NEXT: rclb $7, (%rdx) # sched: [9:0.75]
-; HASWELL-NEXT: rcrb $7, (%rdx) # sched: [9:0.75]
-; HASWELL-NEXT: rclb %cl, %dil # sched: [11:2.25]
-; HASWELL-NEXT: rcrb %cl, %dil # sched: [14:2.50]
-; HASWELL-NEXT: rclb %cl, (%rdx) # sched: [16:2.00]
-; HASWELL-NEXT: rcrb %cl, (%rdx) # sched: [19:2.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_rcl_rcr_8:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: rclb %dil # sched: [3:0.75]
-; BROADWELL-NEXT: rcrb %dil # sched: [3:0.75]
-; BROADWELL-NEXT: rclb (%rdx) # sched: [8:0.75]
-; BROADWELL-NEXT: rcrb (%rdx) # sched: [8:0.75]
-; BROADWELL-NEXT: rclb $7, %dil # sched: [3:0.75]
-; BROADWELL-NEXT: rcrb $7, %dil # sched: [3:0.75]
-; BROADWELL-NEXT: rclb $7, (%rdx) # sched: [8:0.75]
-; BROADWELL-NEXT: rcrb $7, (%rdx) # sched: [8:0.75]
-; BROADWELL-NEXT: rclb %cl, %dil # sched: [11:2.25]
-; BROADWELL-NEXT: rcrb %cl, %dil # sched: [14:2.50]
-; BROADWELL-NEXT: rclb %cl, (%rdx) # sched: [15:2.00]
-; BROADWELL-NEXT: rcrb %cl, (%rdx) # sched: [18:2.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_rcl_rcr_8:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: rclb %dil # sched: [3:0.75]
-; SKYLAKE-NEXT: rcrb %dil # sched: [3:0.75]
-; SKYLAKE-NEXT: rclb (%rdx) # sched: [8:0.75]
-; SKYLAKE-NEXT: rcrb (%rdx) # sched: [8:0.75]
-; SKYLAKE-NEXT: rclb $7, %dil # sched: [3:0.75]
-; SKYLAKE-NEXT: rcrb $7, %dil # sched: [3:0.75]
-; SKYLAKE-NEXT: rclb $7, (%rdx) # sched: [8:0.75]
-; SKYLAKE-NEXT: rcrb $7, (%rdx) # sched: [8:0.75]
-; SKYLAKE-NEXT: rclb %cl, %dil # sched: [11:2.50]
-; SKYLAKE-NEXT: rcrb %cl, %dil # sched: [14:2.50]
-; SKYLAKE-NEXT: rclb %cl, (%rdx) # sched: [15:2.50]
-; SKYLAKE-NEXT: rcrb %cl, (%rdx) # sched: [18:2.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_rcl_rcr_8:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: rclb %dil # sched: [3:0.75]
-; SKX-NEXT: rcrb %dil # sched: [3:0.75]
-; SKX-NEXT: rclb (%rdx) # sched: [8:0.75]
-; SKX-NEXT: rcrb (%rdx) # sched: [8:0.75]
-; SKX-NEXT: rclb $7, %dil # sched: [3:0.75]
-; SKX-NEXT: rcrb $7, %dil # sched: [3:0.75]
-; SKX-NEXT: rclb $7, (%rdx) # sched: [8:0.75]
-; SKX-NEXT: rcrb $7, (%rdx) # sched: [8:0.75]
-; SKX-NEXT: rclb %cl, %dil # sched: [11:2.50]
-; SKX-NEXT: rcrb %cl, %dil # sched: [14:2.50]
-; SKX-NEXT: rclb %cl, (%rdx) # sched: [15:2.50]
-; SKX-NEXT: rcrb %cl, (%rdx) # sched: [18:2.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_rcl_rcr_8:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: rclb %dil # sched: [1:0.50]
-; BDVER2-NEXT: rcrb %dil # sched: [1:0.50]
-; BDVER2-NEXT: rclb (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: rcrb (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: rclb $7, %dil # sched: [13:0.50]
-; BDVER2-NEXT: rcrb $7, %dil # sched: [12:0.50]
-; BDVER2-NEXT: rclb $7, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: rcrb $7, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: rclb %cl, %dil # sched: [12:0.50]
-; BDVER2-NEXT: rcrb %cl, %dil # sched: [11:0.50]
-; BDVER2-NEXT: rclb %cl, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: rcrb %cl, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_rcl_rcr_8:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: rclb %dil # sched: [1:0.50]
-; BTVER2-NEXT: rcrb %dil # sched: [1:0.50]
-; BTVER2-NEXT: rclb (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: rcrb (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: rclb $7, %dil # sched: [1:0.50]
-; BTVER2-NEXT: rcrb $7, %dil # sched: [1:0.50]
-; BTVER2-NEXT: rclb $7, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: rcrb $7, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: rclb %cl, %dil # sched: [1:0.50]
-; BTVER2-NEXT: rcrb %cl, %dil # sched: [1:0.50]
-; BTVER2-NEXT: rclb %cl, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: rcrb %cl, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_rcl_rcr_8:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: rclb %dil # sched: [1:0.25]
-; ZNVER1-NEXT: rcrb %dil # sched: [1:0.25]
-; ZNVER1-NEXT: rclb (%rdx) # sched: [100:0.25]
-; ZNVER1-NEXT: rcrb (%rdx) # sched: [100:0.25]
-; ZNVER1-NEXT: rclb $7, %dil # sched: [1:0.25]
-; ZNVER1-NEXT: rcrb $7, %dil # sched: [1:0.25]
-; ZNVER1-NEXT: rclb $7, (%rdx) # sched: [100:0.25]
-; ZNVER1-NEXT: rcrb $7, (%rdx) # sched: [100:0.25]
-; ZNVER1-NEXT: rclb %cl, %dil # sched: [1:0.25]
-; ZNVER1-NEXT: rcrb %cl, %dil # sched: [1:0.25]
-; ZNVER1-NEXT: rclb %cl, (%rdx) # sched: [100:0.25]
-; ZNVER1-NEXT: rcrb %cl, (%rdx) # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "rclb $0 \0A\09 rcrb $0 \0A\09 rclb $2 \0A\09 rcrb $2 \0A\09 rclb $3, $0 \0A\09 rcrb $3, $0 \0A\09 rclb $3, $2 \0A\09 rcrb $3, $2 \0A\09 rclb %CL, $0 \0A\09 rcrb %CL, $0 \0A\09 rclb %CL, $2 \0A\09 rcrb %CL, $2", "r,r,*m,i"(i8 %a0, i8 %a1, i8 *%a2, i8 7)
- ret void
-}
-define void @test_rcl_rcr_16(i16 %a0, i16 %a1, i16 *%a2) optsize {
-; GENERIC-LABEL: test_rcl_rcr_16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: rclw %di # sched: [2:1.50]
-; GENERIC-NEXT: rcrw %di # sched: [2:1.50]
-; GENERIC-NEXT: rclw (%rdx) # sched: [11:3.50]
-; GENERIC-NEXT: rcrw (%rdx) # sched: [11:3.50]
-; GENERIC-NEXT: rclw $7, %di # sched: [5:4.00]
-; GENERIC-NEXT: rcrw $7, %di # sched: [5:4.00]
-; GENERIC-NEXT: rclw $7, (%rdx) # sched: [11:3.50]
-; GENERIC-NEXT: rcrw $7, (%rdx) # sched: [11:3.50]
-; GENERIC-NEXT: rclw %cl, %di # sched: [5:4.00]
-; GENERIC-NEXT: rcrw %cl, %di # sched: [5:4.00]
-; GENERIC-NEXT: rclw %cl, (%rdx) # sched: [11:3.50]
-; GENERIC-NEXT: rcrw %cl, (%rdx) # sched: [11:3.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_rcl_rcr_16:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: rclw %di # sched: [1:1.00]
-; ATOM-NEXT: rcrw %di # sched: [1:1.00]
-; ATOM-NEXT: rclw (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: rcrw (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: rclw $7, %di # sched: [1:1.00]
-; ATOM-NEXT: rcrw $7, %di # sched: [1:1.00]
-; ATOM-NEXT: rclw $7, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: rcrw $7, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: rclw %cl, %di # sched: [1:1.00]
-; ATOM-NEXT: rcrw %cl, %di # sched: [1:1.00]
-; ATOM-NEXT: rclw %cl, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: rcrw %cl, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_rcl_rcr_16:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: rclw %di # sched: [1:1.00]
-; SLM-NEXT: rcrw %di # sched: [1:1.00]
-; SLM-NEXT: rclw (%rdx) # sched: [4:2.00]
-; SLM-NEXT: rcrw (%rdx) # sched: [4:2.00]
-; SLM-NEXT: rclw $7, %di # sched: [1:1.00]
-; SLM-NEXT: rcrw $7, %di # sched: [1:1.00]
-; SLM-NEXT: rclw $7, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: rcrw $7, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: rclw %cl, %di # sched: [1:1.00]
-; SLM-NEXT: rcrw %cl, %di # sched: [1:1.00]
-; SLM-NEXT: rclw %cl, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: rcrw %cl, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_rcl_rcr_16:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: rclw %di # sched: [2:1.50]
-; SANDY-NEXT: rcrw %di # sched: [2:1.50]
-; SANDY-NEXT: rclw (%rdx) # sched: [11:3.50]
-; SANDY-NEXT: rcrw (%rdx) # sched: [11:3.50]
-; SANDY-NEXT: rclw $7, %di # sched: [5:4.00]
-; SANDY-NEXT: rcrw $7, %di # sched: [5:4.00]
-; SANDY-NEXT: rclw $7, (%rdx) # sched: [11:3.50]
-; SANDY-NEXT: rcrw $7, (%rdx) # sched: [11:3.50]
-; SANDY-NEXT: rclw %cl, %di # sched: [5:4.00]
-; SANDY-NEXT: rcrw %cl, %di # sched: [5:4.00]
-; SANDY-NEXT: rclw %cl, (%rdx) # sched: [11:3.50]
-; SANDY-NEXT: rcrw %cl, (%rdx) # sched: [11:3.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_rcl_rcr_16:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: rclw %di # sched: [3:0.75]
-; HASWELL-NEXT: rcrw %di # sched: [3:0.75]
-; HASWELL-NEXT: rclw (%rdx) # sched: [9:0.75]
-; HASWELL-NEXT: rcrw (%rdx) # sched: [9:0.75]
-; HASWELL-NEXT: rclw $7, %di # sched: [3:0.75]
-; HASWELL-NEXT: rcrw $7, %di # sched: [3:0.75]
-; HASWELL-NEXT: rclw $7, (%rdx) # sched: [9:0.75]
-; HASWELL-NEXT: rcrw $7, (%rdx) # sched: [9:0.75]
-; HASWELL-NEXT: rclw %cl, %di # sched: [11:2.00]
-; HASWELL-NEXT: rcrw %cl, %di # sched: [11:2.00]
-; HASWELL-NEXT: rclw %cl, (%rdx) # sched: [16:2.00]
-; HASWELL-NEXT: rcrw %cl, (%rdx) # sched: [19:2.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_rcl_rcr_16:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: rclw %di # sched: [3:0.75]
-; BROADWELL-NEXT: rcrw %di # sched: [3:0.75]
-; BROADWELL-NEXT: rclw (%rdx) # sched: [8:0.75]
-; BROADWELL-NEXT: rcrw (%rdx) # sched: [8:0.75]
-; BROADWELL-NEXT: rclw $7, %di # sched: [3:0.75]
-; BROADWELL-NEXT: rcrw $7, %di # sched: [3:0.75]
-; BROADWELL-NEXT: rclw $7, (%rdx) # sched: [8:0.75]
-; BROADWELL-NEXT: rcrw $7, (%rdx) # sched: [8:0.75]
-; BROADWELL-NEXT: rclw %cl, %di # sched: [11:2.00]
-; BROADWELL-NEXT: rcrw %cl, %di # sched: [11:2.00]
-; BROADWELL-NEXT: rclw %cl, (%rdx) # sched: [15:2.00]
-; BROADWELL-NEXT: rcrw %cl, (%rdx) # sched: [18:2.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_rcl_rcr_16:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: rclw %di # sched: [3:0.75]
-; SKYLAKE-NEXT: rcrw %di # sched: [3:0.75]
-; SKYLAKE-NEXT: rclw (%rdx) # sched: [8:0.75]
-; SKYLAKE-NEXT: rcrw (%rdx) # sched: [8:0.75]
-; SKYLAKE-NEXT: rclw $7, %di # sched: [3:0.75]
-; SKYLAKE-NEXT: rcrw $7, %di # sched: [3:0.75]
-; SKYLAKE-NEXT: rclw $7, (%rdx) # sched: [8:0.75]
-; SKYLAKE-NEXT: rcrw $7, (%rdx) # sched: [8:0.75]
-; SKYLAKE-NEXT: rclw %cl, %di # sched: [11:2.00]
-; SKYLAKE-NEXT: rcrw %cl, %di # sched: [11:2.00]
-; SKYLAKE-NEXT: rclw %cl, (%rdx) # sched: [15:2.50]
-; SKYLAKE-NEXT: rcrw %cl, (%rdx) # sched: [18:2.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_rcl_rcr_16:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: rclw %di # sched: [3:0.75]
-; SKX-NEXT: rcrw %di # sched: [3:0.75]
-; SKX-NEXT: rclw (%rdx) # sched: [8:0.75]
-; SKX-NEXT: rcrw (%rdx) # sched: [8:0.75]
-; SKX-NEXT: rclw $7, %di # sched: [3:0.75]
-; SKX-NEXT: rcrw $7, %di # sched: [3:0.75]
-; SKX-NEXT: rclw $7, (%rdx) # sched: [8:0.75]
-; SKX-NEXT: rcrw $7, (%rdx) # sched: [8:0.75]
-; SKX-NEXT: rclw %cl, %di # sched: [11:2.00]
-; SKX-NEXT: rcrw %cl, %di # sched: [11:2.00]
-; SKX-NEXT: rclw %cl, (%rdx) # sched: [15:2.50]
-; SKX-NEXT: rcrw %cl, (%rdx) # sched: [18:2.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_rcl_rcr_16:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: rclw %di # sched: [1:0.50]
-; BDVER2-NEXT: rcrw %di # sched: [1:0.50]
-; BDVER2-NEXT: rclw (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: rcrw (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: rclw $7, %di # sched: [11:0.50]
-; BDVER2-NEXT: rcrw $7, %di # sched: [10:0.50]
-; BDVER2-NEXT: rclw $7, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: rcrw $7, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: rclw %cl, %di # sched: [10:0.50]
-; BDVER2-NEXT: rcrw %cl, %di # sched: [9:0.50]
-; BDVER2-NEXT: rclw %cl, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: rcrw %cl, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_rcl_rcr_16:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: rclw %di # sched: [1:0.50]
-; BTVER2-NEXT: rcrw %di # sched: [1:0.50]
-; BTVER2-NEXT: rclw (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: rcrw (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: rclw $7, %di # sched: [1:0.50]
-; BTVER2-NEXT: rcrw $7, %di # sched: [1:0.50]
-; BTVER2-NEXT: rclw $7, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: rcrw $7, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: rclw %cl, %di # sched: [1:0.50]
-; BTVER2-NEXT: rcrw %cl, %di # sched: [1:0.50]
-; BTVER2-NEXT: rclw %cl, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: rcrw %cl, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_rcl_rcr_16:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: rclw %di # sched: [1:0.25]
-; ZNVER1-NEXT: rcrw %di # sched: [1:0.25]
-; ZNVER1-NEXT: rclw (%rdx) # sched: [100:0.25]
-; ZNVER1-NEXT: rcrw (%rdx) # sched: [100:0.25]
-; ZNVER1-NEXT: rclw $7, %di # sched: [1:0.25]
-; ZNVER1-NEXT: rcrw $7, %di # sched: [1:0.25]
-; ZNVER1-NEXT: rclw $7, (%rdx) # sched: [100:0.25]
-; ZNVER1-NEXT: rcrw $7, (%rdx) # sched: [100:0.25]
-; ZNVER1-NEXT: rclw %cl, %di # sched: [1:0.25]
-; ZNVER1-NEXT: rcrw %cl, %di # sched: [1:0.25]
-; ZNVER1-NEXT: rclw %cl, (%rdx) # sched: [100:0.25]
-; ZNVER1-NEXT: rcrw %cl, (%rdx) # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "rclw $0 \0A\09 rcrw $0 \0A\09 rclw $2 \0A\09 rcrw $2 \0A\09 rclw $3, $0 \0A\09 rcrw $3, $0 \0A\09 rclw $3, $2 \0A\09 rcrw $3, $2 \0A\09 rclw %CL, $0 \0A\09 rcrw %CL, $0 \0A\09 rclw %CL, $2 \0A\09 rcrw %CL, $2", "r,r,*m,i"(i16 %a0, i16 %a1, i16 *%a2, i8 7)
- ret void
-}
-define void @test_rcl_rcr_32(i32 %a0, i32 %a1, i32 *%a2) optsize {
-; GENERIC-LABEL: test_rcl_rcr_32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: rcll %edi # sched: [2:1.50]
-; GENERIC-NEXT: rcrl %edi # sched: [2:1.50]
-; GENERIC-NEXT: rcll (%rdx) # sched: [11:3.50]
-; GENERIC-NEXT: rcrl (%rdx) # sched: [11:3.50]
-; GENERIC-NEXT: rcll $7, %edi # sched: [5:4.00]
-; GENERIC-NEXT: rcrl $7, %edi # sched: [5:4.00]
-; GENERIC-NEXT: rcll $7, (%rdx) # sched: [11:3.50]
-; GENERIC-NEXT: rcrl $7, (%rdx) # sched: [11:3.50]
-; GENERIC-NEXT: rcll %cl, %edi # sched: [5:4.00]
-; GENERIC-NEXT: rcrl %cl, %edi # sched: [5:4.00]
-; GENERIC-NEXT: rcll %cl, (%rdx) # sched: [11:3.50]
-; GENERIC-NEXT: rcrl %cl, (%rdx) # sched: [11:3.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_rcl_rcr_32:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: rcll %edi # sched: [1:1.00]
-; ATOM-NEXT: rcrl %edi # sched: [1:1.00]
-; ATOM-NEXT: rcll (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: rcrl (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: rcll $7, %edi # sched: [1:1.00]
-; ATOM-NEXT: rcrl $7, %edi # sched: [1:1.00]
-; ATOM-NEXT: rcll $7, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: rcrl $7, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: rcll %cl, %edi # sched: [1:1.00]
-; ATOM-NEXT: rcrl %cl, %edi # sched: [1:1.00]
-; ATOM-NEXT: rcll %cl, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: rcrl %cl, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_rcl_rcr_32:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: rcll %edi # sched: [1:1.00]
-; SLM-NEXT: rcrl %edi # sched: [1:1.00]
-; SLM-NEXT: rcll (%rdx) # sched: [4:2.00]
-; SLM-NEXT: rcrl (%rdx) # sched: [4:2.00]
-; SLM-NEXT: rcll $7, %edi # sched: [1:1.00]
-; SLM-NEXT: rcrl $7, %edi # sched: [1:1.00]
-; SLM-NEXT: rcll $7, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: rcrl $7, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: rcll %cl, %edi # sched: [1:1.00]
-; SLM-NEXT: rcrl %cl, %edi # sched: [1:1.00]
-; SLM-NEXT: rcll %cl, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: rcrl %cl, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_rcl_rcr_32:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: rcll %edi # sched: [2:1.50]
-; SANDY-NEXT: rcrl %edi # sched: [2:1.50]
-; SANDY-NEXT: rcll (%rdx) # sched: [11:3.50]
-; SANDY-NEXT: rcrl (%rdx) # sched: [11:3.50]
-; SANDY-NEXT: rcll $7, %edi # sched: [5:4.00]
-; SANDY-NEXT: rcrl $7, %edi # sched: [5:4.00]
-; SANDY-NEXT: rcll $7, (%rdx) # sched: [11:3.50]
-; SANDY-NEXT: rcrl $7, (%rdx) # sched: [11:3.50]
-; SANDY-NEXT: rcll %cl, %edi # sched: [5:4.00]
-; SANDY-NEXT: rcrl %cl, %edi # sched: [5:4.00]
-; SANDY-NEXT: rcll %cl, (%rdx) # sched: [11:3.50]
-; SANDY-NEXT: rcrl %cl, (%rdx) # sched: [11:3.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_rcl_rcr_32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: rcll %edi # sched: [3:0.75]
-; HASWELL-NEXT: rcrl %edi # sched: [3:0.75]
-; HASWELL-NEXT: rcll (%rdx) # sched: [9:0.75]
-; HASWELL-NEXT: rcrl (%rdx) # sched: [9:0.75]
-; HASWELL-NEXT: rcll $7, %edi # sched: [3:0.75]
-; HASWELL-NEXT: rcrl $7, %edi # sched: [3:0.75]
-; HASWELL-NEXT: rcll $7, (%rdx) # sched: [9:0.75]
-; HASWELL-NEXT: rcrl $7, (%rdx) # sched: [9:0.75]
-; HASWELL-NEXT: rcll %cl, %edi # sched: [11:2.00]
-; HASWELL-NEXT: rcrl %cl, %edi # sched: [11:2.00]
-; HASWELL-NEXT: rcll %cl, (%rdx) # sched: [16:2.00]
-; HASWELL-NEXT: rcrl %cl, (%rdx) # sched: [19:2.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_rcl_rcr_32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: rcll %edi # sched: [3:0.75]
-; BROADWELL-NEXT: rcrl %edi # sched: [3:0.75]
-; BROADWELL-NEXT: rcll (%rdx) # sched: [8:0.75]
-; BROADWELL-NEXT: rcrl (%rdx) # sched: [8:0.75]
-; BROADWELL-NEXT: rcll $7, %edi # sched: [3:0.75]
-; BROADWELL-NEXT: rcrl $7, %edi # sched: [3:0.75]
-; BROADWELL-NEXT: rcll $7, (%rdx) # sched: [8:0.75]
-; BROADWELL-NEXT: rcrl $7, (%rdx) # sched: [8:0.75]
-; BROADWELL-NEXT: rcll %cl, %edi # sched: [11:2.00]
-; BROADWELL-NEXT: rcrl %cl, %edi # sched: [11:2.00]
-; BROADWELL-NEXT: rcll %cl, (%rdx) # sched: [15:2.00]
-; BROADWELL-NEXT: rcrl %cl, (%rdx) # sched: [18:2.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_rcl_rcr_32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: rcll %edi # sched: [3:0.75]
-; SKYLAKE-NEXT: rcrl %edi # sched: [3:0.75]
-; SKYLAKE-NEXT: rcll (%rdx) # sched: [8:0.75]
-; SKYLAKE-NEXT: rcrl (%rdx) # sched: [8:0.75]
-; SKYLAKE-NEXT: rcll $7, %edi # sched: [3:0.75]
-; SKYLAKE-NEXT: rcrl $7, %edi # sched: [3:0.75]
-; SKYLAKE-NEXT: rcll $7, (%rdx) # sched: [8:0.75]
-; SKYLAKE-NEXT: rcrl $7, (%rdx) # sched: [8:0.75]
-; SKYLAKE-NEXT: rcll %cl, %edi # sched: [11:2.00]
-; SKYLAKE-NEXT: rcrl %cl, %edi # sched: [11:2.00]
-; SKYLAKE-NEXT: rcll %cl, (%rdx) # sched: [15:2.50]
-; SKYLAKE-NEXT: rcrl %cl, (%rdx) # sched: [18:2.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_rcl_rcr_32:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: rcll %edi # sched: [3:0.75]
-; SKX-NEXT: rcrl %edi # sched: [3:0.75]
-; SKX-NEXT: rcll (%rdx) # sched: [8:0.75]
-; SKX-NEXT: rcrl (%rdx) # sched: [8:0.75]
-; SKX-NEXT: rcll $7, %edi # sched: [3:0.75]
-; SKX-NEXT: rcrl $7, %edi # sched: [3:0.75]
-; SKX-NEXT: rcll $7, (%rdx) # sched: [8:0.75]
-; SKX-NEXT: rcrl $7, (%rdx) # sched: [8:0.75]
-; SKX-NEXT: rcll %cl, %edi # sched: [11:2.00]
-; SKX-NEXT: rcrl %cl, %edi # sched: [11:2.00]
-; SKX-NEXT: rcll %cl, (%rdx) # sched: [15:2.50]
-; SKX-NEXT: rcrl %cl, (%rdx) # sched: [18:2.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_rcl_rcr_32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: rcll %edi # sched: [1:0.50]
-; BDVER2-NEXT: rcrl %edi # sched: [1:0.50]
-; BDVER2-NEXT: rcll (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: rcrl (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: rcll $7, %edi # sched: [8:0.50]
-; BDVER2-NEXT: rcrl $7, %edi # sched: [7:0.50]
-; BDVER2-NEXT: rcll $7, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: rcrl $7, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: rcll %cl, %edi # sched: [7:0.50]
-; BDVER2-NEXT: rcrl %cl, %edi # sched: [7:0.50]
-; BDVER2-NEXT: rcll %cl, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: rcrl %cl, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_rcl_rcr_32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: rcll %edi # sched: [1:0.50]
-; BTVER2-NEXT: rcrl %edi # sched: [1:0.50]
-; BTVER2-NEXT: rcll (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: rcrl (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: rcll $7, %edi # sched: [1:0.50]
-; BTVER2-NEXT: rcrl $7, %edi # sched: [1:0.50]
-; BTVER2-NEXT: rcll $7, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: rcrl $7, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: rcll %cl, %edi # sched: [1:0.50]
-; BTVER2-NEXT: rcrl %cl, %edi # sched: [1:0.50]
-; BTVER2-NEXT: rcll %cl, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: rcrl %cl, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_rcl_rcr_32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: rcll %edi # sched: [1:0.25]
-; ZNVER1-NEXT: rcrl %edi # sched: [1:0.25]
-; ZNVER1-NEXT: rcll (%rdx) # sched: [100:0.25]
-; ZNVER1-NEXT: rcrl (%rdx) # sched: [100:0.25]
-; ZNVER1-NEXT: rcll $7, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: rcrl $7, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: rcll $7, (%rdx) # sched: [100:0.25]
-; ZNVER1-NEXT: rcrl $7, (%rdx) # sched: [100:0.25]
-; ZNVER1-NEXT: rcll %cl, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: rcrl %cl, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: rcll %cl, (%rdx) # sched: [100:0.25]
-; ZNVER1-NEXT: rcrl %cl, (%rdx) # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "rcll $0 \0A\09 rcrl $0 \0A\09 rcll $2 \0A\09 rcrl $2 \0A\09 rcll $3, $0 \0A\09 rcrl $3, $0 \0A\09 rcll $3, $2 \0A\09 rcrl $3, $2 \0A\09 rcll %CL, $0 \0A\09 rcrl %CL, $0 \0A\09 rcll %CL, $2 \0A\09 rcrl %CL, $2", "r,r,*m,i"(i32 %a0, i32 %a1, i32 *%a2, i8 7)
- ret void
-}
-define void @test_rcl_rcr_64(i64 %a0, i64 %a1, i64 *%a2) optsize {
-; GENERIC-LABEL: test_rcl_rcr_64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: rclq %rdi # sched: [2:1.50]
-; GENERIC-NEXT: rcrq %rdi # sched: [2:1.50]
-; GENERIC-NEXT: rclq (%rdx) # sched: [11:3.50]
-; GENERIC-NEXT: rcrq (%rdx) # sched: [11:3.50]
-; GENERIC-NEXT: rclq $7, %rdi # sched: [5:4.00]
-; GENERIC-NEXT: rcrq $7, %rdi # sched: [5:4.00]
-; GENERIC-NEXT: rclq $7, (%rdx) # sched: [11:3.50]
-; GENERIC-NEXT: rcrq $7, (%rdx) # sched: [11:3.50]
-; GENERIC-NEXT: rclq %cl, %rdi # sched: [5:4.00]
-; GENERIC-NEXT: rcrq %cl, %rdi # sched: [5:4.00]
-; GENERIC-NEXT: rclq %cl, (%rdx) # sched: [11:3.50]
-; GENERIC-NEXT: rcrq %cl, (%rdx) # sched: [11:3.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_rcl_rcr_64:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: rclq %rdi # sched: [1:1.00]
-; ATOM-NEXT: rcrq %rdi # sched: [1:1.00]
-; ATOM-NEXT: rclq (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: rcrq (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: rclq $7, %rdi # sched: [1:1.00]
-; ATOM-NEXT: rcrq $7, %rdi # sched: [1:1.00]
-; ATOM-NEXT: rclq $7, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: rcrq $7, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: rclq %cl, %rdi # sched: [1:1.00]
-; ATOM-NEXT: rcrq %cl, %rdi # sched: [1:1.00]
-; ATOM-NEXT: rclq %cl, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: rcrq %cl, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_rcl_rcr_64:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: rclq %rdi # sched: [1:1.00]
-; SLM-NEXT: rcrq %rdi # sched: [1:1.00]
-; SLM-NEXT: rclq (%rdx) # sched: [4:2.00]
-; SLM-NEXT: rcrq (%rdx) # sched: [4:2.00]
-; SLM-NEXT: rclq $7, %rdi # sched: [1:1.00]
-; SLM-NEXT: rcrq $7, %rdi # sched: [1:1.00]
-; SLM-NEXT: rclq $7, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: rcrq $7, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: rclq %cl, %rdi # sched: [1:1.00]
-; SLM-NEXT: rcrq %cl, %rdi # sched: [1:1.00]
-; SLM-NEXT: rclq %cl, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: rcrq %cl, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_rcl_rcr_64:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: rclq %rdi # sched: [2:1.50]
-; SANDY-NEXT: rcrq %rdi # sched: [2:1.50]
-; SANDY-NEXT: rclq (%rdx) # sched: [11:3.50]
-; SANDY-NEXT: rcrq (%rdx) # sched: [11:3.50]
-; SANDY-NEXT: rclq $7, %rdi # sched: [5:4.00]
-; SANDY-NEXT: rcrq $7, %rdi # sched: [5:4.00]
-; SANDY-NEXT: rclq $7, (%rdx) # sched: [11:3.50]
-; SANDY-NEXT: rcrq $7, (%rdx) # sched: [11:3.50]
-; SANDY-NEXT: rclq %cl, %rdi # sched: [5:4.00]
-; SANDY-NEXT: rcrq %cl, %rdi # sched: [5:4.00]
-; SANDY-NEXT: rclq %cl, (%rdx) # sched: [11:3.50]
-; SANDY-NEXT: rcrq %cl, (%rdx) # sched: [11:3.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_rcl_rcr_64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: rclq %rdi # sched: [3:0.75]
-; HASWELL-NEXT: rcrq %rdi # sched: [3:0.75]
-; HASWELL-NEXT: rclq (%rdx) # sched: [9:0.75]
-; HASWELL-NEXT: rcrq (%rdx) # sched: [9:0.75]
-; HASWELL-NEXT: rclq $7, %rdi # sched: [3:0.75]
-; HASWELL-NEXT: rcrq $7, %rdi # sched: [3:0.75]
-; HASWELL-NEXT: rclq $7, (%rdx) # sched: [9:0.75]
-; HASWELL-NEXT: rcrq $7, (%rdx) # sched: [9:0.75]
-; HASWELL-NEXT: rclq %cl, %rdi # sched: [11:2.00]
-; HASWELL-NEXT: rcrq %cl, %rdi # sched: [11:2.00]
-; HASWELL-NEXT: rclq %cl, (%rdx) # sched: [16:2.00]
-; HASWELL-NEXT: rcrq %cl, (%rdx) # sched: [19:2.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_rcl_rcr_64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: rclq %rdi # sched: [3:0.75]
-; BROADWELL-NEXT: rcrq %rdi # sched: [3:0.75]
-; BROADWELL-NEXT: rclq (%rdx) # sched: [8:0.75]
-; BROADWELL-NEXT: rcrq (%rdx) # sched: [8:0.75]
-; BROADWELL-NEXT: rclq $7, %rdi # sched: [3:0.75]
-; BROADWELL-NEXT: rcrq $7, %rdi # sched: [3:0.75]
-; BROADWELL-NEXT: rclq $7, (%rdx) # sched: [8:0.75]
-; BROADWELL-NEXT: rcrq $7, (%rdx) # sched: [8:0.75]
-; BROADWELL-NEXT: rclq %cl, %rdi # sched: [11:2.00]
-; BROADWELL-NEXT: rcrq %cl, %rdi # sched: [11:2.00]
-; BROADWELL-NEXT: rclq %cl, (%rdx) # sched: [15:2.00]
-; BROADWELL-NEXT: rcrq %cl, (%rdx) # sched: [18:2.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_rcl_rcr_64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: rclq %rdi # sched: [3:0.75]
-; SKYLAKE-NEXT: rcrq %rdi # sched: [3:0.75]
-; SKYLAKE-NEXT: rclq (%rdx) # sched: [8:0.75]
-; SKYLAKE-NEXT: rcrq (%rdx) # sched: [8:0.75]
-; SKYLAKE-NEXT: rclq $7, %rdi # sched: [3:0.75]
-; SKYLAKE-NEXT: rcrq $7, %rdi # sched: [3:0.75]
-; SKYLAKE-NEXT: rclq $7, (%rdx) # sched: [8:0.75]
-; SKYLAKE-NEXT: rcrq $7, (%rdx) # sched: [8:0.75]
-; SKYLAKE-NEXT: rclq %cl, %rdi # sched: [11:2.00]
-; SKYLAKE-NEXT: rcrq %cl, %rdi # sched: [11:2.00]
-; SKYLAKE-NEXT: rclq %cl, (%rdx) # sched: [15:2.50]
-; SKYLAKE-NEXT: rcrq %cl, (%rdx) # sched: [18:2.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_rcl_rcr_64:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: rclq %rdi # sched: [3:0.75]
-; SKX-NEXT: rcrq %rdi # sched: [3:0.75]
-; SKX-NEXT: rclq (%rdx) # sched: [8:0.75]
-; SKX-NEXT: rcrq (%rdx) # sched: [8:0.75]
-; SKX-NEXT: rclq $7, %rdi # sched: [3:0.75]
-; SKX-NEXT: rcrq $7, %rdi # sched: [3:0.75]
-; SKX-NEXT: rclq $7, (%rdx) # sched: [8:0.75]
-; SKX-NEXT: rcrq $7, (%rdx) # sched: [8:0.75]
-; SKX-NEXT: rclq %cl, %rdi # sched: [11:2.00]
-; SKX-NEXT: rcrq %cl, %rdi # sched: [11:2.00]
-; SKX-NEXT: rclq %cl, (%rdx) # sched: [15:2.50]
-; SKX-NEXT: rcrq %cl, (%rdx) # sched: [18:2.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_rcl_rcr_64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: rclq %rdi # sched: [1:0.50]
-; BDVER2-NEXT: rcrq %rdi # sched: [1:0.50]
-; BDVER2-NEXT: rclq (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: rcrq (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: rclq $7, %rdi # sched: [8:0.50]
-; BDVER2-NEXT: rcrq $7, %rdi # sched: [7:0.50]
-; BDVER2-NEXT: rclq $7, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: rcrq $7, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: rclq %cl, %rdi # sched: [7:0.50]
-; BDVER2-NEXT: rcrq %cl, %rdi # sched: [7:0.50]
-; BDVER2-NEXT: rclq %cl, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: rcrq %cl, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_rcl_rcr_64:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: rclq %rdi # sched: [1:0.50]
-; BTVER2-NEXT: rcrq %rdi # sched: [1:0.50]
-; BTVER2-NEXT: rclq (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: rcrq (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: rclq $7, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: rcrq $7, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: rclq $7, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: rcrq $7, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: rclq %cl, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: rcrq %cl, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: rclq %cl, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: rcrq %cl, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_rcl_rcr_64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: rclq %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: rcrq %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: rclq (%rdx) # sched: [100:0.25]
-; ZNVER1-NEXT: rcrq (%rdx) # sched: [100:0.25]
-; ZNVER1-NEXT: rclq $7, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: rcrq $7, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: rclq $7, (%rdx) # sched: [100:0.25]
-; ZNVER1-NEXT: rcrq $7, (%rdx) # sched: [100:0.25]
-; ZNVER1-NEXT: rclq %cl, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: rcrq %cl, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: rclq %cl, (%rdx) # sched: [100:0.25]
-; ZNVER1-NEXT: rcrq %cl, (%rdx) # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "rclq $0 \0A\09 rcrq $0 \0A\09 rclq $2 \0A\09 rcrq $2 \0A\09 rclq $3, $0 \0A\09 rcrq $3, $0 \0A\09 rclq $3, $2 \0A\09 rcrq $3, $2 \0A\09 rclq %CL, $0 \0A\09 rcrq %CL, $0 \0A\09 rclq %CL, $2 \0A\09 rcrq %CL, $2", "r,r,*m,i"(i64 %a0, i64 %a1, i64 *%a2, i8 7)
- ret void
-}
-
-define void @test_rdmsr_wrmsr() optsize {
-; GENERIC-LABEL: test_rdmsr_wrmsr:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: rdmsr # sched: [100:0.33]
-; GENERIC-NEXT: wrmsr # sched: [100:0.33]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_rdmsr_wrmsr:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: rdmsr # sched: [78:39.00]
-; ATOM-NEXT: wrmsr # sched: [202:101.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_rdmsr_wrmsr:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: rdmsr # sched: [100:1.00]
-; SLM-NEXT: wrmsr # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_rdmsr_wrmsr:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: rdmsr # sched: [100:0.33]
-; SANDY-NEXT: wrmsr # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_rdmsr_wrmsr:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: rdmsr # sched: [100:0.25]
-; HASWELL-NEXT: wrmsr # sched: [100:0.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_rdmsr_wrmsr:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: rdmsr # sched: [100:0.25]
-; BROADWELL-NEXT: wrmsr # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_rdmsr_wrmsr:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: rdmsr # sched: [100:0.25]
-; SKYLAKE-NEXT: wrmsr # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_rdmsr_wrmsr:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: rdmsr # sched: [100:0.25]
-; SKX-NEXT: wrmsr # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_rdmsr_wrmsr:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: rdmsr # sched: [100:0.50]
-; BDVER2-NEXT: wrmsr # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_rdmsr_wrmsr:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: rdmsr # sched: [100:0.50]
-; BTVER2-NEXT: wrmsr # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_rdmsr_wrmsr:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: rdmsr # sched: [100:0.25]
-; ZNVER1-NEXT: wrmsr # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "rdmsr \0A\09 wrmsr", ""()
- ret void
-}
-
-define void @test_rdpmc() optsize {
-; GENERIC-LABEL: test_rdpmc:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: rdpmc # sched: [100:0.33]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_rdpmc:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: rdpmc # sched: [46:23.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_rdpmc:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: rdpmc # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_rdpmc:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: rdpmc # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_rdpmc:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: rdpmc # sched: [1:8.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_rdpmc:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: rdpmc # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_rdpmc:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: rdpmc # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_rdpmc:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: rdpmc # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_rdpmc:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: rdpmc # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_rdpmc:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: rdpmc # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_rdpmc:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: rdpmc # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "rdpmc", ""()
- ret void
-}
-
-define void @test_rdtsc_rdtscp() optsize {
-; GENERIC-LABEL: test_rdtsc_rdtscp:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: rdtsc # sched: [100:0.33]
-; GENERIC-NEXT: rdtscp # sched: [100:0.33]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_rdtsc_rdtscp:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: rdtsc # sched: [30:15.00]
-; ATOM-NEXT: rdtscp # sched: [30:15.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_rdtsc_rdtscp:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: rdtsc # sched: [100:1.00]
-; SLM-NEXT: rdtscp # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_rdtsc_rdtscp:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: rdtsc # sched: [100:0.33]
-; SANDY-NEXT: rdtscp # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_rdtsc_rdtscp:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: rdtsc # sched: [18:2.00]
-; HASWELL-NEXT: rdtscp # sched: [42:5.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_rdtsc_rdtscp:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: rdtsc # sched: [18:2.00]
-; BROADWELL-NEXT: rdtscp # sched: [42:5.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_rdtsc_rdtscp:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: rdtsc # sched: [18:2.00]
-; SKYLAKE-NEXT: rdtscp # sched: [42:5.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_rdtsc_rdtscp:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: rdtsc # sched: [18:2.00]
-; SKX-NEXT: rdtscp # sched: [42:5.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_rdtsc_rdtscp:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: rdtsc # sched: [100:0.50]
-; BDVER2-NEXT: rdtscp # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_rdtsc_rdtscp:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: rdtsc # sched: [100:0.50]
-; BTVER2-NEXT: rdtscp # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_rdtsc_rdtscp:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: rdtsc # sched: [100:0.25]
-; ZNVER1-NEXT: rdtscp # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "rdtsc \0A\09 rdtscp", ""()
- ret void
-}
-
-define void @test_ret() optsize {
-; GENERIC-LABEL: test_ret:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-; GENERIC-NEXT: retq $4095 # imm = 0xFFF
-; GENERIC-NEXT: # sched: [6:1.00]
-; GENERIC-NEXT: lretl # sched: [6:1.00]
-; GENERIC-NEXT: lretl $4095 # imm = 0xFFF
-; GENERIC-NEXT: # sched: [6:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_ret:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-; ATOM-NEXT: retq $4095 # imm = 0xFFF
-; ATOM-NEXT: # sched: [1:1.00]
-; ATOM-NEXT: lretl # sched: [79:39.50]
-; ATOM-NEXT: lretl $4095 # imm = 0xFFF
-; ATOM-NEXT: # sched: [79:39.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_ret:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: retq # sched: [4:1.00]
-; SLM-NEXT: retq $4095 # imm = 0xFFF
-; SLM-NEXT: # sched: [4:1.00]
-; SLM-NEXT: lretl # sched: [4:1.00]
-; SLM-NEXT: lretl $4095 # imm = 0xFFF
-; SLM-NEXT: # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_ret:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-; SANDY-NEXT: retq $4095 # imm = 0xFFF
-; SANDY-NEXT: # sched: [6:1.00]
-; SANDY-NEXT: lretl # sched: [6:1.00]
-; SANDY-NEXT: lretl $4095 # imm = 0xFFF
-; SANDY-NEXT: # sched: [6:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_ret:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-; HASWELL-NEXT: retq $4095 # imm = 0xFFF
-; HASWELL-NEXT: # sched: [1:2.00]
-; HASWELL-NEXT: lretl # sched: [6:0.50]
-; HASWELL-NEXT: lretl $4095 # imm = 0xFFF
-; HASWELL-NEXT: # sched: [1:2.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_ret:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-; BROADWELL-NEXT: retq $4095 # imm = 0xFFF
-; BROADWELL-NEXT: # sched: [6:0.50]
-; BROADWELL-NEXT: lretl # sched: [6:0.50]
-; BROADWELL-NEXT: lretl $4095 # imm = 0xFFF
-; BROADWELL-NEXT: # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_ret:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-; SKYLAKE-NEXT: retq $4095 # imm = 0xFFF
-; SKYLAKE-NEXT: # sched: [6:0.50]
-; SKYLAKE-NEXT: lretl # sched: [6:0.50]
-; SKYLAKE-NEXT: lretl $4095 # imm = 0xFFF
-; SKYLAKE-NEXT: # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_ret:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: retq # sched: [7:1.00]
-; SKX-NEXT: retq $4095 # imm = 0xFFF
-; SKX-NEXT: # sched: [6:0.50]
-; SKX-NEXT: lretl # sched: [6:0.50]
-; SKX-NEXT: lretl $4095 # imm = 0xFFF
-; SKX-NEXT: # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_ret:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-; BDVER2-NEXT: retq $4095 # imm = 0xFFF
-; BDVER2-NEXT: # sched: [5:1.00]
-; BDVER2-NEXT: lretl # sched: [5:1.00]
-; BDVER2-NEXT: lretl $4095 # imm = 0xFFF
-; BDVER2-NEXT: # sched: [5:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_ret:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-; BTVER2-NEXT: retq $4095 # imm = 0xFFF
-; BTVER2-NEXT: # sched: [4:1.00]
-; BTVER2-NEXT: lretl # sched: [4:1.00]
-; BTVER2-NEXT: lretl $4095 # imm = 0xFFF
-; BTVER2-NEXT: # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_ret:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
-; ZNVER1-NEXT: retq $4095 # imm = 0xFFF
-; ZNVER1-NEXT: # sched: [5:0.50]
-; ZNVER1-NEXT: lretl # sched: [1:0.50]
-; ZNVER1-NEXT: lretl $4095 # imm = 0xFFF
-; ZNVER1-NEXT: # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "ret \0A\09 ret $0 \0A\09 lret \0A\09 lret $0", "i"(i16 4095)
- ret void
-}
-
-define void @test_rol_ror_8(i8 %a0, i8 %a1, i8 *%a2) optsize {
-; GENERIC-LABEL: test_rol_ror_8:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: rolb %dil # sched: [2:1.00]
-; GENERIC-NEXT: rorb %dil # sched: [2:1.00]
-; GENERIC-NEXT: rolb (%rdx) # sched: [8:1.00]
-; GENERIC-NEXT: rorb (%rdx) # sched: [8:1.00]
-; GENERIC-NEXT: rolb $7, %dil # sched: [2:1.00]
-; GENERIC-NEXT: rorb $7, %dil # sched: [2:1.00]
-; GENERIC-NEXT: rolb $7, (%rdx) # sched: [8:1.00]
-; GENERIC-NEXT: rorb $7, (%rdx) # sched: [8:1.00]
-; GENERIC-NEXT: rolb %cl, %dil # sched: [3:1.50]
-; GENERIC-NEXT: rorb %cl, %dil # sched: [3:1.50]
-; GENERIC-NEXT: rolb %cl, (%rdx) # sched: [9:1.50]
-; GENERIC-NEXT: rorb %cl, (%rdx) # sched: [9:1.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_rol_ror_8:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: rolb %dil # sched: [1:1.00]
-; ATOM-NEXT: rorb %dil # sched: [1:1.00]
-; ATOM-NEXT: rolb (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: rorb (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: rolb $7, %dil # sched: [1:1.00]
-; ATOM-NEXT: rorb $7, %dil # sched: [1:1.00]
-; ATOM-NEXT: rolb $7, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: rorb $7, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: rolb %cl, %dil # sched: [1:1.00]
-; ATOM-NEXT: rorb %cl, %dil # sched: [1:1.00]
-; ATOM-NEXT: rolb %cl, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: rorb %cl, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_rol_ror_8:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: rolb %dil # sched: [1:1.00]
-; SLM-NEXT: rorb %dil # sched: [1:1.00]
-; SLM-NEXT: rolb (%rdx) # sched: [4:2.00]
-; SLM-NEXT: rorb (%rdx) # sched: [4:2.00]
-; SLM-NEXT: rolb $7, %dil # sched: [1:1.00]
-; SLM-NEXT: rorb $7, %dil # sched: [1:1.00]
-; SLM-NEXT: rolb $7, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: rorb $7, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: rolb %cl, %dil # sched: [1:1.00]
-; SLM-NEXT: rorb %cl, %dil # sched: [1:1.00]
-; SLM-NEXT: rolb %cl, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: rorb %cl, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_rol_ror_8:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: rolb %dil # sched: [2:1.00]
-; SANDY-NEXT: rorb %dil # sched: [2:1.00]
-; SANDY-NEXT: rolb (%rdx) # sched: [8:1.00]
-; SANDY-NEXT: rorb (%rdx) # sched: [8:1.00]
-; SANDY-NEXT: rolb $7, %dil # sched: [2:1.00]
-; SANDY-NEXT: rorb $7, %dil # sched: [2:1.00]
-; SANDY-NEXT: rolb $7, (%rdx) # sched: [8:1.00]
-; SANDY-NEXT: rorb $7, (%rdx) # sched: [8:1.00]
-; SANDY-NEXT: rolb %cl, %dil # sched: [3:1.50]
-; SANDY-NEXT: rorb %cl, %dil # sched: [3:1.50]
-; SANDY-NEXT: rolb %cl, (%rdx) # sched: [9:1.50]
-; SANDY-NEXT: rorb %cl, (%rdx) # sched: [9:1.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_rol_ror_8:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: rolb %dil # sched: [2:1.00]
-; HASWELL-NEXT: rorb %dil # sched: [2:1.00]
-; HASWELL-NEXT: rolb (%rdx) # sched: [8:1.00]
-; HASWELL-NEXT: rorb (%rdx) # sched: [8:1.00]
-; HASWELL-NEXT: rolb $7, %dil # sched: [2:1.00]
-; HASWELL-NEXT: rorb $7, %dil # sched: [2:1.00]
-; HASWELL-NEXT: rolb $7, (%rdx) # sched: [8:1.00]
-; HASWELL-NEXT: rorb $7, (%rdx) # sched: [8:1.00]
-; HASWELL-NEXT: rolb %cl, %dil # sched: [3:1.00]
-; HASWELL-NEXT: rorb %cl, %dil # sched: [3:1.00]
-; HASWELL-NEXT: rolb %cl, (%rdx) # sched: [9:1.00]
-; HASWELL-NEXT: rorb %cl, (%rdx) # sched: [9:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_rol_ror_8:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: rolb %dil # sched: [2:1.00]
-; BROADWELL-NEXT: rorb %dil # sched: [2:1.00]
-; BROADWELL-NEXT: rolb (%rdx) # sched: [7:1.00]
-; BROADWELL-NEXT: rorb (%rdx) # sched: [7:1.00]
-; BROADWELL-NEXT: rolb $7, %dil # sched: [2:1.00]
-; BROADWELL-NEXT: rorb $7, %dil # sched: [2:1.00]
-; BROADWELL-NEXT: rolb $7, (%rdx) # sched: [7:1.00]
-; BROADWELL-NEXT: rorb $7, (%rdx) # sched: [7:1.00]
-; BROADWELL-NEXT: rolb %cl, %dil # sched: [3:1.00]
-; BROADWELL-NEXT: rorb %cl, %dil # sched: [3:1.00]
-; BROADWELL-NEXT: rolb %cl, (%rdx) # sched: [8:1.00]
-; BROADWELL-NEXT: rorb %cl, (%rdx) # sched: [8:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_rol_ror_8:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: rolb %dil # sched: [2:1.00]
-; SKYLAKE-NEXT: rorb %dil # sched: [2:1.00]
-; SKYLAKE-NEXT: rolb (%rdx) # sched: [7:1.00]
-; SKYLAKE-NEXT: rorb (%rdx) # sched: [7:1.00]
-; SKYLAKE-NEXT: rolb $7, %dil # sched: [2:1.00]
-; SKYLAKE-NEXT: rorb $7, %dil # sched: [2:1.00]
-; SKYLAKE-NEXT: rolb $7, (%rdx) # sched: [7:1.00]
-; SKYLAKE-NEXT: rorb $7, (%rdx) # sched: [7:1.00]
-; SKYLAKE-NEXT: rolb %cl, %dil # sched: [3:1.50]
-; SKYLAKE-NEXT: rorb %cl, %dil # sched: [3:1.50]
-; SKYLAKE-NEXT: rolb %cl, (%rdx) # sched: [8:1.50]
-; SKYLAKE-NEXT: rorb %cl, (%rdx) # sched: [8:1.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_rol_ror_8:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: rolb %dil # sched: [2:1.00]
-; SKX-NEXT: rorb %dil # sched: [2:1.00]
-; SKX-NEXT: rolb (%rdx) # sched: [7:1.00]
-; SKX-NEXT: rorb (%rdx) # sched: [7:1.00]
-; SKX-NEXT: rolb $7, %dil # sched: [2:1.00]
-; SKX-NEXT: rorb $7, %dil # sched: [2:1.00]
-; SKX-NEXT: rolb $7, (%rdx) # sched: [7:1.00]
-; SKX-NEXT: rorb $7, (%rdx) # sched: [7:1.00]
-; SKX-NEXT: rolb %cl, %dil # sched: [3:1.50]
-; SKX-NEXT: rorb %cl, %dil # sched: [3:1.50]
-; SKX-NEXT: rolb %cl, (%rdx) # sched: [8:1.50]
-; SKX-NEXT: rorb %cl, (%rdx) # sched: [8:1.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_rol_ror_8:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: rolb %dil # sched: [1:0.50]
-; BDVER2-NEXT: rorb %dil # sched: [1:0.50]
-; BDVER2-NEXT: rolb (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: rorb (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: rolb $7, %dil # sched: [1:0.50]
-; BDVER2-NEXT: rorb $7, %dil # sched: [1:0.50]
-; BDVER2-NEXT: rolb $7, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: rorb $7, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: rolb %cl, %dil # sched: [1:0.50]
-; BDVER2-NEXT: rorb %cl, %dil # sched: [1:0.50]
-; BDVER2-NEXT: rolb %cl, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: rorb %cl, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_rol_ror_8:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: rolb %dil # sched: [1:0.50]
-; BTVER2-NEXT: rorb %dil # sched: [1:0.50]
-; BTVER2-NEXT: rolb (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: rorb (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: rolb $7, %dil # sched: [1:0.50]
-; BTVER2-NEXT: rorb $7, %dil # sched: [1:0.50]
-; BTVER2-NEXT: rolb $7, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: rorb $7, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: rolb %cl, %dil # sched: [1:0.50]
-; BTVER2-NEXT: rorb %cl, %dil # sched: [1:0.50]
-; BTVER2-NEXT: rolb %cl, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: rorb %cl, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_rol_ror_8:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: rolb %dil # sched: [1:0.25]
-; ZNVER1-NEXT: rorb %dil # sched: [1:0.25]
-; ZNVER1-NEXT: rolb (%rdx) # sched: [5:1.00]
-; ZNVER1-NEXT: rorb (%rdx) # sched: [5:1.00]
-; ZNVER1-NEXT: rolb $7, %dil # sched: [1:0.25]
-; ZNVER1-NEXT: rorb $7, %dil # sched: [1:0.25]
-; ZNVER1-NEXT: rolb $7, (%rdx) # sched: [5:1.00]
-; ZNVER1-NEXT: rorb $7, (%rdx) # sched: [5:1.00]
-; ZNVER1-NEXT: rolb %cl, %dil # sched: [1:0.25]
-; ZNVER1-NEXT: rorb %cl, %dil # sched: [1:0.25]
-; ZNVER1-NEXT: rolb %cl, (%rdx) # sched: [5:1.00]
-; ZNVER1-NEXT: rorb %cl, (%rdx) # sched: [5:1.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "rolb $0 \0A\09 rorb $0 \0A\09 rolb $2 \0A\09 rorb $2 \0A\09 rolb $3, $0 \0A\09 rorb $3, $0 \0A\09 rolb $3, $2 \0A\09 rorb $3, $2 \0A\09 rolb %CL, $0 \0A\09 rorb %CL, $0 \0A\09 rolb %CL, $2 \0A\09 rorb %CL, $2", "r,r,*m,i"(i8 %a0, i8 %a1, i8 *%a2, i8 7)
- ret void
-}
-define void @test_rol_ror_16(i16 %a0, i16 %a1, i16 *%a2) optsize {
-; GENERIC-LABEL: test_rol_ror_16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: rolw %di # sched: [2:1.00]
-; GENERIC-NEXT: rorw %di # sched: [2:1.00]
-; GENERIC-NEXT: rolw (%rdx) # sched: [8:1.00]
-; GENERIC-NEXT: rorw (%rdx) # sched: [8:1.00]
-; GENERIC-NEXT: rolw $7, %di # sched: [2:1.00]
-; GENERIC-NEXT: rorw $7, %di # sched: [2:1.00]
-; GENERIC-NEXT: rolw $7, (%rdx) # sched: [8:1.00]
-; GENERIC-NEXT: rorw $7, (%rdx) # sched: [8:1.00]
-; GENERIC-NEXT: rolw %cl, %di # sched: [3:1.50]
-; GENERIC-NEXT: rorw %cl, %di # sched: [3:1.50]
-; GENERIC-NEXT: rolw %cl, (%rdx) # sched: [9:1.50]
-; GENERIC-NEXT: rorw %cl, (%rdx) # sched: [9:1.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_rol_ror_16:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: rolw %di # sched: [1:1.00]
-; ATOM-NEXT: rorw %di # sched: [1:1.00]
-; ATOM-NEXT: rolw (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: rorw (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: rolw $7, %di # sched: [1:1.00]
-; ATOM-NEXT: rorw $7, %di # sched: [1:1.00]
-; ATOM-NEXT: rolw $7, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: rorw $7, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: rolw %cl, %di # sched: [1:1.00]
-; ATOM-NEXT: rorw %cl, %di # sched: [1:1.00]
-; ATOM-NEXT: rolw %cl, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: rorw %cl, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_rol_ror_16:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: rolw %di # sched: [1:1.00]
-; SLM-NEXT: rorw %di # sched: [1:1.00]
-; SLM-NEXT: rolw (%rdx) # sched: [4:2.00]
-; SLM-NEXT: rorw (%rdx) # sched: [4:2.00]
-; SLM-NEXT: rolw $7, %di # sched: [1:1.00]
-; SLM-NEXT: rorw $7, %di # sched: [1:1.00]
-; SLM-NEXT: rolw $7, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: rorw $7, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: rolw %cl, %di # sched: [1:1.00]
-; SLM-NEXT: rorw %cl, %di # sched: [1:1.00]
-; SLM-NEXT: rolw %cl, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: rorw %cl, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_rol_ror_16:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: rolw %di # sched: [2:1.00]
-; SANDY-NEXT: rorw %di # sched: [2:1.00]
-; SANDY-NEXT: rolw (%rdx) # sched: [8:1.00]
-; SANDY-NEXT: rorw (%rdx) # sched: [8:1.00]
-; SANDY-NEXT: rolw $7, %di # sched: [2:1.00]
-; SANDY-NEXT: rorw $7, %di # sched: [2:1.00]
-; SANDY-NEXT: rolw $7, (%rdx) # sched: [8:1.00]
-; SANDY-NEXT: rorw $7, (%rdx) # sched: [8:1.00]
-; SANDY-NEXT: rolw %cl, %di # sched: [3:1.50]
-; SANDY-NEXT: rorw %cl, %di # sched: [3:1.50]
-; SANDY-NEXT: rolw %cl, (%rdx) # sched: [9:1.50]
-; SANDY-NEXT: rorw %cl, (%rdx) # sched: [9:1.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_rol_ror_16:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: rolw %di # sched: [2:1.00]
-; HASWELL-NEXT: rorw %di # sched: [2:1.00]
-; HASWELL-NEXT: rolw (%rdx) # sched: [8:1.00]
-; HASWELL-NEXT: rorw (%rdx) # sched: [8:1.00]
-; HASWELL-NEXT: rolw $7, %di # sched: [2:1.00]
-; HASWELL-NEXT: rorw $7, %di # sched: [2:1.00]
-; HASWELL-NEXT: rolw $7, (%rdx) # sched: [8:1.00]
-; HASWELL-NEXT: rorw $7, (%rdx) # sched: [8:1.00]
-; HASWELL-NEXT: rolw %cl, %di # sched: [3:1.00]
-; HASWELL-NEXT: rorw %cl, %di # sched: [3:1.00]
-; HASWELL-NEXT: rolw %cl, (%rdx) # sched: [9:1.00]
-; HASWELL-NEXT: rorw %cl, (%rdx) # sched: [9:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_rol_ror_16:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: rolw %di # sched: [2:1.00]
-; BROADWELL-NEXT: rorw %di # sched: [2:1.00]
-; BROADWELL-NEXT: rolw (%rdx) # sched: [7:1.00]
-; BROADWELL-NEXT: rorw (%rdx) # sched: [7:1.00]
-; BROADWELL-NEXT: rolw $7, %di # sched: [2:1.00]
-; BROADWELL-NEXT: rorw $7, %di # sched: [2:1.00]
-; BROADWELL-NEXT: rolw $7, (%rdx) # sched: [7:1.00]
-; BROADWELL-NEXT: rorw $7, (%rdx) # sched: [7:1.00]
-; BROADWELL-NEXT: rolw %cl, %di # sched: [3:1.00]
-; BROADWELL-NEXT: rorw %cl, %di # sched: [3:1.00]
-; BROADWELL-NEXT: rolw %cl, (%rdx) # sched: [8:1.00]
-; BROADWELL-NEXT: rorw %cl, (%rdx) # sched: [8:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_rol_ror_16:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: rolw %di # sched: [2:1.00]
-; SKYLAKE-NEXT: rorw %di # sched: [2:1.00]
-; SKYLAKE-NEXT: rolw (%rdx) # sched: [7:1.00]
-; SKYLAKE-NEXT: rorw (%rdx) # sched: [7:1.00]
-; SKYLAKE-NEXT: rolw $7, %di # sched: [2:1.00]
-; SKYLAKE-NEXT: rorw $7, %di # sched: [2:1.00]
-; SKYLAKE-NEXT: rolw $7, (%rdx) # sched: [7:1.00]
-; SKYLAKE-NEXT: rorw $7, (%rdx) # sched: [7:1.00]
-; SKYLAKE-NEXT: rolw %cl, %di # sched: [3:1.50]
-; SKYLAKE-NEXT: rorw %cl, %di # sched: [3:1.50]
-; SKYLAKE-NEXT: rolw %cl, (%rdx) # sched: [8:1.50]
-; SKYLAKE-NEXT: rorw %cl, (%rdx) # sched: [8:1.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_rol_ror_16:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: rolw %di # sched: [2:1.00]
-; SKX-NEXT: rorw %di # sched: [2:1.00]
-; SKX-NEXT: rolw (%rdx) # sched: [7:1.00]
-; SKX-NEXT: rorw (%rdx) # sched: [7:1.00]
-; SKX-NEXT: rolw $7, %di # sched: [2:1.00]
-; SKX-NEXT: rorw $7, %di # sched: [2:1.00]
-; SKX-NEXT: rolw $7, (%rdx) # sched: [7:1.00]
-; SKX-NEXT: rorw $7, (%rdx) # sched: [7:1.00]
-; SKX-NEXT: rolw %cl, %di # sched: [3:1.50]
-; SKX-NEXT: rorw %cl, %di # sched: [3:1.50]
-; SKX-NEXT: rolw %cl, (%rdx) # sched: [8:1.50]
-; SKX-NEXT: rorw %cl, (%rdx) # sched: [8:1.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_rol_ror_16:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: rolw %di # sched: [1:0.50]
-; BDVER2-NEXT: rorw %di # sched: [1:0.50]
-; BDVER2-NEXT: rolw (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: rorw (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: rolw $7, %di # sched: [1:0.50]
-; BDVER2-NEXT: rorw $7, %di # sched: [1:0.50]
-; BDVER2-NEXT: rolw $7, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: rorw $7, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: rolw %cl, %di # sched: [1:0.50]
-; BDVER2-NEXT: rorw %cl, %di # sched: [1:0.50]
-; BDVER2-NEXT: rolw %cl, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: rorw %cl, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_rol_ror_16:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: rolw %di # sched: [1:0.50]
-; BTVER2-NEXT: rorw %di # sched: [1:0.50]
-; BTVER2-NEXT: rolw (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: rorw (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: rolw $7, %di # sched: [1:0.50]
-; BTVER2-NEXT: rorw $7, %di # sched: [1:0.50]
-; BTVER2-NEXT: rolw $7, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: rorw $7, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: rolw %cl, %di # sched: [1:0.50]
-; BTVER2-NEXT: rorw %cl, %di # sched: [1:0.50]
-; BTVER2-NEXT: rolw %cl, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: rorw %cl, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_rol_ror_16:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: rolw %di # sched: [1:0.25]
-; ZNVER1-NEXT: rorw %di # sched: [1:0.25]
-; ZNVER1-NEXT: rolw (%rdx) # sched: [5:1.00]
-; ZNVER1-NEXT: rorw (%rdx) # sched: [5:1.00]
-; ZNVER1-NEXT: rolw $7, %di # sched: [1:0.25]
-; ZNVER1-NEXT: rorw $7, %di # sched: [1:0.25]
-; ZNVER1-NEXT: rolw $7, (%rdx) # sched: [5:1.00]
-; ZNVER1-NEXT: rorw $7, (%rdx) # sched: [5:1.00]
-; ZNVER1-NEXT: rolw %cl, %di # sched: [1:0.25]
-; ZNVER1-NEXT: rorw %cl, %di # sched: [1:0.25]
-; ZNVER1-NEXT: rolw %cl, (%rdx) # sched: [5:1.00]
-; ZNVER1-NEXT: rorw %cl, (%rdx) # sched: [5:1.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "rolw $0 \0A\09 rorw $0 \0A\09 rolw $2 \0A\09 rorw $2 \0A\09 rolw $3, $0 \0A\09 rorw $3, $0 \0A\09 rolw $3, $2 \0A\09 rorw $3, $2 \0A\09 rolw %CL, $0 \0A\09 rorw %CL, $0 \0A\09 rolw %CL, $2 \0A\09 rorw %CL, $2", "r,r,*m,i"(i16 %a0, i16 %a1, i16 *%a2, i8 7)
- ret void
-}
-define void @test_rol_ror_32(i32 %a0, i32 %a1, i32 *%a2) optsize {
-; GENERIC-LABEL: test_rol_ror_32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: roll %edi # sched: [2:1.00]
-; GENERIC-NEXT: rorl %edi # sched: [2:1.00]
-; GENERIC-NEXT: roll (%rdx) # sched: [8:1.00]
-; GENERIC-NEXT: rorl (%rdx) # sched: [8:1.00]
-; GENERIC-NEXT: roll $7, %edi # sched: [2:1.00]
-; GENERIC-NEXT: rorl $7, %edi # sched: [2:1.00]
-; GENERIC-NEXT: roll $7, (%rdx) # sched: [8:1.00]
-; GENERIC-NEXT: rorl $7, (%rdx) # sched: [8:1.00]
-; GENERIC-NEXT: roll %cl, %edi # sched: [3:1.50]
-; GENERIC-NEXT: rorl %cl, %edi # sched: [3:1.50]
-; GENERIC-NEXT: roll %cl, (%rdx) # sched: [9:1.50]
-; GENERIC-NEXT: rorl %cl, (%rdx) # sched: [9:1.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_rol_ror_32:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: roll %edi # sched: [1:1.00]
-; ATOM-NEXT: rorl %edi # sched: [1:1.00]
-; ATOM-NEXT: roll (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: rorl (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: roll $7, %edi # sched: [1:1.00]
-; ATOM-NEXT: rorl $7, %edi # sched: [1:1.00]
-; ATOM-NEXT: roll $7, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: rorl $7, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: roll %cl, %edi # sched: [1:1.00]
-; ATOM-NEXT: rorl %cl, %edi # sched: [1:1.00]
-; ATOM-NEXT: roll %cl, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: rorl %cl, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_rol_ror_32:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: roll %edi # sched: [1:1.00]
-; SLM-NEXT: rorl %edi # sched: [1:1.00]
-; SLM-NEXT: roll (%rdx) # sched: [4:2.00]
-; SLM-NEXT: rorl (%rdx) # sched: [4:2.00]
-; SLM-NEXT: roll $7, %edi # sched: [1:1.00]
-; SLM-NEXT: rorl $7, %edi # sched: [1:1.00]
-; SLM-NEXT: roll $7, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: rorl $7, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: roll %cl, %edi # sched: [1:1.00]
-; SLM-NEXT: rorl %cl, %edi # sched: [1:1.00]
-; SLM-NEXT: roll %cl, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: rorl %cl, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_rol_ror_32:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: roll %edi # sched: [2:1.00]
-; SANDY-NEXT: rorl %edi # sched: [2:1.00]
-; SANDY-NEXT: roll (%rdx) # sched: [8:1.00]
-; SANDY-NEXT: rorl (%rdx) # sched: [8:1.00]
-; SANDY-NEXT: roll $7, %edi # sched: [2:1.00]
-; SANDY-NEXT: rorl $7, %edi # sched: [2:1.00]
-; SANDY-NEXT: roll $7, (%rdx) # sched: [8:1.00]
-; SANDY-NEXT: rorl $7, (%rdx) # sched: [8:1.00]
-; SANDY-NEXT: roll %cl, %edi # sched: [3:1.50]
-; SANDY-NEXT: rorl %cl, %edi # sched: [3:1.50]
-; SANDY-NEXT: roll %cl, (%rdx) # sched: [9:1.50]
-; SANDY-NEXT: rorl %cl, (%rdx) # sched: [9:1.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_rol_ror_32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: roll %edi # sched: [2:1.00]
-; HASWELL-NEXT: rorl %edi # sched: [2:1.00]
-; HASWELL-NEXT: roll (%rdx) # sched: [8:1.00]
-; HASWELL-NEXT: rorl (%rdx) # sched: [8:1.00]
-; HASWELL-NEXT: roll $7, %edi # sched: [2:1.00]
-; HASWELL-NEXT: rorl $7, %edi # sched: [2:1.00]
-; HASWELL-NEXT: roll $7, (%rdx) # sched: [8:1.00]
-; HASWELL-NEXT: rorl $7, (%rdx) # sched: [8:1.00]
-; HASWELL-NEXT: roll %cl, %edi # sched: [3:1.00]
-; HASWELL-NEXT: rorl %cl, %edi # sched: [3:1.00]
-; HASWELL-NEXT: roll %cl, (%rdx) # sched: [9:1.00]
-; HASWELL-NEXT: rorl %cl, (%rdx) # sched: [9:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_rol_ror_32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: roll %edi # sched: [2:1.00]
-; BROADWELL-NEXT: rorl %edi # sched: [2:1.00]
-; BROADWELL-NEXT: roll (%rdx) # sched: [7:1.00]
-; BROADWELL-NEXT: rorl (%rdx) # sched: [7:1.00]
-; BROADWELL-NEXT: roll $7, %edi # sched: [2:1.00]
-; BROADWELL-NEXT: rorl $7, %edi # sched: [2:1.00]
-; BROADWELL-NEXT: roll $7, (%rdx) # sched: [7:1.00]
-; BROADWELL-NEXT: rorl $7, (%rdx) # sched: [7:1.00]
-; BROADWELL-NEXT: roll %cl, %edi # sched: [3:1.00]
-; BROADWELL-NEXT: rorl %cl, %edi # sched: [3:1.00]
-; BROADWELL-NEXT: roll %cl, (%rdx) # sched: [8:1.00]
-; BROADWELL-NEXT: rorl %cl, (%rdx) # sched: [8:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_rol_ror_32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: roll %edi # sched: [2:1.00]
-; SKYLAKE-NEXT: rorl %edi # sched: [2:1.00]
-; SKYLAKE-NEXT: roll (%rdx) # sched: [7:1.00]
-; SKYLAKE-NEXT: rorl (%rdx) # sched: [7:1.00]
-; SKYLAKE-NEXT: roll $7, %edi # sched: [2:1.00]
-; SKYLAKE-NEXT: rorl $7, %edi # sched: [2:1.00]
-; SKYLAKE-NEXT: roll $7, (%rdx) # sched: [7:1.00]
-; SKYLAKE-NEXT: rorl $7, (%rdx) # sched: [7:1.00]
-; SKYLAKE-NEXT: roll %cl, %edi # sched: [3:1.50]
-; SKYLAKE-NEXT: rorl %cl, %edi # sched: [3:1.50]
-; SKYLAKE-NEXT: roll %cl, (%rdx) # sched: [8:1.50]
-; SKYLAKE-NEXT: rorl %cl, (%rdx) # sched: [8:1.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_rol_ror_32:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: roll %edi # sched: [2:1.00]
-; SKX-NEXT: rorl %edi # sched: [2:1.00]
-; SKX-NEXT: roll (%rdx) # sched: [7:1.00]
-; SKX-NEXT: rorl (%rdx) # sched: [7:1.00]
-; SKX-NEXT: roll $7, %edi # sched: [2:1.00]
-; SKX-NEXT: rorl $7, %edi # sched: [2:1.00]
-; SKX-NEXT: roll $7, (%rdx) # sched: [7:1.00]
-; SKX-NEXT: rorl $7, (%rdx) # sched: [7:1.00]
-; SKX-NEXT: roll %cl, %edi # sched: [3:1.50]
-; SKX-NEXT: rorl %cl, %edi # sched: [3:1.50]
-; SKX-NEXT: roll %cl, (%rdx) # sched: [8:1.50]
-; SKX-NEXT: rorl %cl, (%rdx) # sched: [8:1.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_rol_ror_32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: roll %edi # sched: [1:0.50]
-; BDVER2-NEXT: rorl %edi # sched: [1:0.50]
-; BDVER2-NEXT: roll (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: rorl (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: roll $7, %edi # sched: [1:0.50]
-; BDVER2-NEXT: rorl $7, %edi # sched: [1:0.50]
-; BDVER2-NEXT: roll $7, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: rorl $7, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: roll %cl, %edi # sched: [1:0.50]
-; BDVER2-NEXT: rorl %cl, %edi # sched: [1:0.50]
-; BDVER2-NEXT: roll %cl, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: rorl %cl, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_rol_ror_32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: roll %edi # sched: [1:0.50]
-; BTVER2-NEXT: rorl %edi # sched: [1:0.50]
-; BTVER2-NEXT: roll (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: rorl (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: roll $7, %edi # sched: [1:0.50]
-; BTVER2-NEXT: rorl $7, %edi # sched: [1:0.50]
-; BTVER2-NEXT: roll $7, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: rorl $7, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: roll %cl, %edi # sched: [1:0.50]
-; BTVER2-NEXT: rorl %cl, %edi # sched: [1:0.50]
-; BTVER2-NEXT: roll %cl, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: rorl %cl, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_rol_ror_32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: roll %edi # sched: [1:0.25]
-; ZNVER1-NEXT: rorl %edi # sched: [1:0.25]
-; ZNVER1-NEXT: roll (%rdx) # sched: [5:1.00]
-; ZNVER1-NEXT: rorl (%rdx) # sched: [5:1.00]
-; ZNVER1-NEXT: roll $7, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: rorl $7, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: roll $7, (%rdx) # sched: [5:1.00]
-; ZNVER1-NEXT: rorl $7, (%rdx) # sched: [5:1.00]
-; ZNVER1-NEXT: roll %cl, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: rorl %cl, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: roll %cl, (%rdx) # sched: [5:1.00]
-; ZNVER1-NEXT: rorl %cl, (%rdx) # sched: [5:1.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "roll $0 \0A\09 rorl $0 \0A\09 roll $2 \0A\09 rorl $2 \0A\09 roll $3, $0 \0A\09 rorl $3, $0 \0A\09 roll $3, $2 \0A\09 rorl $3, $2 \0A\09 roll %CL, $0 \0A\09 rorl %CL, $0 \0A\09 roll %CL, $2 \0A\09 rorl %CL, $2", "r,r,*m,i"(i32 %a0, i32 %a1, i32 *%a2, i8 7)
- ret void
-}
-define void @test_rol_ror_64(i64 %a0, i64 %a1, i64 *%a2) optsize {
-; GENERIC-LABEL: test_rol_ror_64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: rolq %rdi # sched: [2:1.00]
-; GENERIC-NEXT: rorq %rdi # sched: [2:1.00]
-; GENERIC-NEXT: rolq (%rdx) # sched: [8:1.00]
-; GENERIC-NEXT: rorq (%rdx) # sched: [8:1.00]
-; GENERIC-NEXT: rolq $7, %rdi # sched: [2:1.00]
-; GENERIC-NEXT: rorq $7, %rdi # sched: [2:1.00]
-; GENERIC-NEXT: rolq $7, (%rdx) # sched: [8:1.00]
-; GENERIC-NEXT: rorq $7, (%rdx) # sched: [8:1.00]
-; GENERIC-NEXT: rolq %cl, %rdi # sched: [3:1.50]
-; GENERIC-NEXT: rorq %cl, %rdi # sched: [3:1.50]
-; GENERIC-NEXT: rolq %cl, (%rdx) # sched: [9:1.50]
-; GENERIC-NEXT: rorq %cl, (%rdx) # sched: [9:1.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_rol_ror_64:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: rolq %rdi # sched: [1:1.00]
-; ATOM-NEXT: rorq %rdi # sched: [1:1.00]
-; ATOM-NEXT: rolq (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: rorq (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: rolq $7, %rdi # sched: [1:1.00]
-; ATOM-NEXT: rorq $7, %rdi # sched: [1:1.00]
-; ATOM-NEXT: rolq $7, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: rorq $7, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: rolq %cl, %rdi # sched: [1:1.00]
-; ATOM-NEXT: rorq %cl, %rdi # sched: [1:1.00]
-; ATOM-NEXT: rolq %cl, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: rorq %cl, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_rol_ror_64:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: rolq %rdi # sched: [1:1.00]
-; SLM-NEXT: rorq %rdi # sched: [1:1.00]
-; SLM-NEXT: rolq (%rdx) # sched: [4:2.00]
-; SLM-NEXT: rorq (%rdx) # sched: [4:2.00]
-; SLM-NEXT: rolq $7, %rdi # sched: [1:1.00]
-; SLM-NEXT: rorq $7, %rdi # sched: [1:1.00]
-; SLM-NEXT: rolq $7, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: rorq $7, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: rolq %cl, %rdi # sched: [1:1.00]
-; SLM-NEXT: rorq %cl, %rdi # sched: [1:1.00]
-; SLM-NEXT: rolq %cl, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: rorq %cl, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_rol_ror_64:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: rolq %rdi # sched: [2:1.00]
-; SANDY-NEXT: rorq %rdi # sched: [2:1.00]
-; SANDY-NEXT: rolq (%rdx) # sched: [8:1.00]
-; SANDY-NEXT: rorq (%rdx) # sched: [8:1.00]
-; SANDY-NEXT: rolq $7, %rdi # sched: [2:1.00]
-; SANDY-NEXT: rorq $7, %rdi # sched: [2:1.00]
-; SANDY-NEXT: rolq $7, (%rdx) # sched: [8:1.00]
-; SANDY-NEXT: rorq $7, (%rdx) # sched: [8:1.00]
-; SANDY-NEXT: rolq %cl, %rdi # sched: [3:1.50]
-; SANDY-NEXT: rorq %cl, %rdi # sched: [3:1.50]
-; SANDY-NEXT: rolq %cl, (%rdx) # sched: [9:1.50]
-; SANDY-NEXT: rorq %cl, (%rdx) # sched: [9:1.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_rol_ror_64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: rolq %rdi # sched: [2:1.00]
-; HASWELL-NEXT: rorq %rdi # sched: [2:1.00]
-; HASWELL-NEXT: rolq (%rdx) # sched: [8:1.00]
-; HASWELL-NEXT: rorq (%rdx) # sched: [8:1.00]
-; HASWELL-NEXT: rolq $7, %rdi # sched: [2:1.00]
-; HASWELL-NEXT: rorq $7, %rdi # sched: [2:1.00]
-; HASWELL-NEXT: rolq $7, (%rdx) # sched: [8:1.00]
-; HASWELL-NEXT: rorq $7, (%rdx) # sched: [8:1.00]
-; HASWELL-NEXT: rolq %cl, %rdi # sched: [3:1.00]
-; HASWELL-NEXT: rorq %cl, %rdi # sched: [3:1.00]
-; HASWELL-NEXT: rolq %cl, (%rdx) # sched: [9:1.00]
-; HASWELL-NEXT: rorq %cl, (%rdx) # sched: [9:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_rol_ror_64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: rolq %rdi # sched: [2:1.00]
-; BROADWELL-NEXT: rorq %rdi # sched: [2:1.00]
-; BROADWELL-NEXT: rolq (%rdx) # sched: [7:1.00]
-; BROADWELL-NEXT: rorq (%rdx) # sched: [7:1.00]
-; BROADWELL-NEXT: rolq $7, %rdi # sched: [2:1.00]
-; BROADWELL-NEXT: rorq $7, %rdi # sched: [2:1.00]
-; BROADWELL-NEXT: rolq $7, (%rdx) # sched: [7:1.00]
-; BROADWELL-NEXT: rorq $7, (%rdx) # sched: [7:1.00]
-; BROADWELL-NEXT: rolq %cl, %rdi # sched: [3:1.00]
-; BROADWELL-NEXT: rorq %cl, %rdi # sched: [3:1.00]
-; BROADWELL-NEXT: rolq %cl, (%rdx) # sched: [8:1.00]
-; BROADWELL-NEXT: rorq %cl, (%rdx) # sched: [8:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_rol_ror_64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: rolq %rdi # sched: [2:1.00]
-; SKYLAKE-NEXT: rorq %rdi # sched: [2:1.00]
-; SKYLAKE-NEXT: rolq (%rdx) # sched: [7:1.00]
-; SKYLAKE-NEXT: rorq (%rdx) # sched: [7:1.00]
-; SKYLAKE-NEXT: rolq $7, %rdi # sched: [2:1.00]
-; SKYLAKE-NEXT: rorq $7, %rdi # sched: [2:1.00]
-; SKYLAKE-NEXT: rolq $7, (%rdx) # sched: [7:1.00]
-; SKYLAKE-NEXT: rorq $7, (%rdx) # sched: [7:1.00]
-; SKYLAKE-NEXT: rolq %cl, %rdi # sched: [3:1.50]
-; SKYLAKE-NEXT: rorq %cl, %rdi # sched: [3:1.50]
-; SKYLAKE-NEXT: rolq %cl, (%rdx) # sched: [8:1.50]
-; SKYLAKE-NEXT: rorq %cl, (%rdx) # sched: [8:1.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_rol_ror_64:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: rolq %rdi # sched: [2:1.00]
-; SKX-NEXT: rorq %rdi # sched: [2:1.00]
-; SKX-NEXT: rolq (%rdx) # sched: [7:1.00]
-; SKX-NEXT: rorq (%rdx) # sched: [7:1.00]
-; SKX-NEXT: rolq $7, %rdi # sched: [2:1.00]
-; SKX-NEXT: rorq $7, %rdi # sched: [2:1.00]
-; SKX-NEXT: rolq $7, (%rdx) # sched: [7:1.00]
-; SKX-NEXT: rorq $7, (%rdx) # sched: [7:1.00]
-; SKX-NEXT: rolq %cl, %rdi # sched: [3:1.50]
-; SKX-NEXT: rorq %cl, %rdi # sched: [3:1.50]
-; SKX-NEXT: rolq %cl, (%rdx) # sched: [8:1.50]
-; SKX-NEXT: rorq %cl, (%rdx) # sched: [8:1.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_rol_ror_64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: rolq %rdi # sched: [1:0.50]
-; BDVER2-NEXT: rorq %rdi # sched: [1:0.50]
-; BDVER2-NEXT: rolq (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: rorq (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: rolq $7, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: rorq $7, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: rolq $7, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: rorq $7, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: rolq %cl, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: rorq %cl, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: rolq %cl, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: rorq %cl, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_rol_ror_64:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: rolq %rdi # sched: [1:0.50]
-; BTVER2-NEXT: rorq %rdi # sched: [1:0.50]
-; BTVER2-NEXT: rolq (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: rorq (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: rolq $7, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: rorq $7, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: rolq $7, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: rorq $7, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: rolq %cl, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: rorq %cl, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: rolq %cl, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: rorq %cl, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_rol_ror_64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: rolq %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: rorq %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: rolq (%rdx) # sched: [5:1.00]
-; ZNVER1-NEXT: rorq (%rdx) # sched: [5:1.00]
-; ZNVER1-NEXT: rolq $7, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: rorq $7, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: rolq $7, (%rdx) # sched: [5:1.00]
-; ZNVER1-NEXT: rorq $7, (%rdx) # sched: [5:1.00]
-; ZNVER1-NEXT: rolq %cl, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: rorq %cl, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: rolq %cl, (%rdx) # sched: [5:1.00]
-; ZNVER1-NEXT: rorq %cl, (%rdx) # sched: [5:1.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "rolq $0 \0A\09 rorq $0 \0A\09 rolq $2 \0A\09 rorq $2 \0A\09 rolq $3, $0 \0A\09 rorq $3, $0 \0A\09 rolq $3, $2 \0A\09 rorq $3, $2 \0A\09 rolq %CL, $0 \0A\09 rorq %CL, $0 \0A\09 rolq %CL, $2 \0A\09 rorq %CL, $2", "r,r,*m,i"(i64 %a0, i64 %a1, i64 *%a2, i8 7)
- ret void
-}
-
-define void @test_sar_shl_shr_8(i8 %a0, i8 %a1, i8 *%a2) optsize {
-; GENERIC-LABEL: test_sar_shl_shr_8:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: sarb %dil # sched: [1:0.50]
-; GENERIC-NEXT: shlb %dil # sched: [1:0.50]
-; GENERIC-NEXT: shrb %dil # sched: [1:0.50]
-; GENERIC-NEXT: sarb (%rdx) # sched: [7:1.00]
-; GENERIC-NEXT: shlb (%rdx) # sched: [7:1.00]
-; GENERIC-NEXT: shrb (%rdx) # sched: [7:1.00]
-; GENERIC-NEXT: sarb $7, %dil # sched: [1:0.50]
-; GENERIC-NEXT: shlb $7, %dil # sched: [1:0.50]
-; GENERIC-NEXT: shrb $7, %dil # sched: [1:0.50]
-; GENERIC-NEXT: sarb $7, (%rdx) # sched: [7:1.00]
-; GENERIC-NEXT: shlb $7, (%rdx) # sched: [7:1.00]
-; GENERIC-NEXT: shrb $7, (%rdx) # sched: [7:1.00]
-; GENERIC-NEXT: sarb %cl, %dil # sched: [3:1.50]
-; GENERIC-NEXT: shlb %cl, %dil # sched: [3:1.50]
-; GENERIC-NEXT: shrb %cl, %dil # sched: [3:1.50]
-; GENERIC-NEXT: sarb %cl, (%rdx) # sched: [9:1.50]
-; GENERIC-NEXT: shlb %cl, (%rdx) # sched: [9:1.50]
-; GENERIC-NEXT: shrb %cl, (%rdx) # sched: [9:1.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_sar_shl_shr_8:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: sarb %dil # sched: [1:1.00]
-; ATOM-NEXT: shlb %dil # sched: [1:1.00]
-; ATOM-NEXT: shrb %dil # sched: [1:1.00]
-; ATOM-NEXT: sarb (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: shlb (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: shrb (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: sarb $7, %dil # sched: [1:1.00]
-; ATOM-NEXT: shlb $7, %dil # sched: [1:1.00]
-; ATOM-NEXT: shrb $7, %dil # sched: [1:1.00]
-; ATOM-NEXT: sarb $7, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: shlb $7, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: shrb $7, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: sarb %cl, %dil # sched: [1:1.00]
-; ATOM-NEXT: shlb %cl, %dil # sched: [1:1.00]
-; ATOM-NEXT: shrb %cl, %dil # sched: [1:1.00]
-; ATOM-NEXT: sarb %cl, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: shlb %cl, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: shrb %cl, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_sar_shl_shr_8:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: sarb %dil # sched: [1:1.00]
-; SLM-NEXT: shlb %dil # sched: [1:1.00]
-; SLM-NEXT: shrb %dil # sched: [1:1.00]
-; SLM-NEXT: sarb (%rdx) # sched: [4:2.00]
-; SLM-NEXT: shlb (%rdx) # sched: [4:2.00]
-; SLM-NEXT: shrb (%rdx) # sched: [4:2.00]
-; SLM-NEXT: sarb $7, %dil # sched: [1:1.00]
-; SLM-NEXT: shlb $7, %dil # sched: [1:1.00]
-; SLM-NEXT: shrb $7, %dil # sched: [1:1.00]
-; SLM-NEXT: sarb $7, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: shlb $7, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: shrb $7, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: sarb %cl, %dil # sched: [1:1.00]
-; SLM-NEXT: shlb %cl, %dil # sched: [1:1.00]
-; SLM-NEXT: shrb %cl, %dil # sched: [1:1.00]
-; SLM-NEXT: sarb %cl, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: shlb %cl, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: shrb %cl, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_sar_shl_shr_8:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: sarb %dil # sched: [1:0.50]
-; SANDY-NEXT: shlb %dil # sched: [1:0.50]
-; SANDY-NEXT: shrb %dil # sched: [1:0.50]
-; SANDY-NEXT: sarb (%rdx) # sched: [7:1.00]
-; SANDY-NEXT: shlb (%rdx) # sched: [7:1.00]
-; SANDY-NEXT: shrb (%rdx) # sched: [7:1.00]
-; SANDY-NEXT: sarb $7, %dil # sched: [1:0.50]
-; SANDY-NEXT: shlb $7, %dil # sched: [1:0.50]
-; SANDY-NEXT: shrb $7, %dil # sched: [1:0.50]
-; SANDY-NEXT: sarb $7, (%rdx) # sched: [7:1.00]
-; SANDY-NEXT: shlb $7, (%rdx) # sched: [7:1.00]
-; SANDY-NEXT: shrb $7, (%rdx) # sched: [7:1.00]
-; SANDY-NEXT: sarb %cl, %dil # sched: [3:1.50]
-; SANDY-NEXT: shlb %cl, %dil # sched: [3:1.50]
-; SANDY-NEXT: shrb %cl, %dil # sched: [3:1.50]
-; SANDY-NEXT: sarb %cl, (%rdx) # sched: [9:1.50]
-; SANDY-NEXT: shlb %cl, (%rdx) # sched: [9:1.50]
-; SANDY-NEXT: shrb %cl, (%rdx) # sched: [9:1.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_sar_shl_shr_8:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: sarb %dil # sched: [1:0.50]
-; HASWELL-NEXT: shlb %dil # sched: [1:0.50]
-; HASWELL-NEXT: shrb %dil # sched: [1:0.50]
-; HASWELL-NEXT: sarb (%rdx) # sched: [7:1.00]
-; HASWELL-NEXT: shlb (%rdx) # sched: [7:1.00]
-; HASWELL-NEXT: shrb (%rdx) # sched: [7:1.00]
-; HASWELL-NEXT: sarb $7, %dil # sched: [1:0.50]
-; HASWELL-NEXT: shlb $7, %dil # sched: [1:0.50]
-; HASWELL-NEXT: shrb $7, %dil # sched: [1:0.50]
-; HASWELL-NEXT: sarb $7, (%rdx) # sched: [7:1.00]
-; HASWELL-NEXT: shlb $7, (%rdx) # sched: [7:1.00]
-; HASWELL-NEXT: shrb $7, (%rdx) # sched: [7:1.00]
-; HASWELL-NEXT: sarb %cl, %dil # sched: [3:1.00]
-; HASWELL-NEXT: shlb %cl, %dil # sched: [3:1.00]
-; HASWELL-NEXT: shrb %cl, %dil # sched: [3:1.00]
-; HASWELL-NEXT: sarb %cl, (%rdx) # sched: [9:1.00]
-; HASWELL-NEXT: shlb %cl, (%rdx) # sched: [9:1.00]
-; HASWELL-NEXT: shrb %cl, (%rdx) # sched: [9:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_sar_shl_shr_8:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: sarb %dil # sched: [1:0.50]
-; BROADWELL-NEXT: shlb %dil # sched: [1:0.50]
-; BROADWELL-NEXT: shrb %dil # sched: [1:0.50]
-; BROADWELL-NEXT: sarb (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: shlb (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: shrb (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: sarb $7, %dil # sched: [1:0.50]
-; BROADWELL-NEXT: shlb $7, %dil # sched: [1:0.50]
-; BROADWELL-NEXT: shrb $7, %dil # sched: [1:0.50]
-; BROADWELL-NEXT: sarb $7, (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: shlb $7, (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: shrb $7, (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: sarb %cl, %dil # sched: [3:1.00]
-; BROADWELL-NEXT: shlb %cl, %dil # sched: [3:1.00]
-; BROADWELL-NEXT: shrb %cl, %dil # sched: [3:1.00]
-; BROADWELL-NEXT: sarb %cl, (%rdx) # sched: [8:1.00]
-; BROADWELL-NEXT: shlb %cl, (%rdx) # sched: [8:1.00]
-; BROADWELL-NEXT: shrb %cl, (%rdx) # sched: [8:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_sar_shl_shr_8:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: sarb %dil # sched: [1:0.50]
-; SKYLAKE-NEXT: shlb %dil # sched: [1:0.50]
-; SKYLAKE-NEXT: shrb %dil # sched: [1:0.50]
-; SKYLAKE-NEXT: sarb (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: shlb (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: shrb (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: sarb $7, %dil # sched: [1:0.50]
-; SKYLAKE-NEXT: shlb $7, %dil # sched: [1:0.50]
-; SKYLAKE-NEXT: shrb $7, %dil # sched: [1:0.50]
-; SKYLAKE-NEXT: sarb $7, (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: shlb $7, (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: shrb $7, (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: sarb %cl, %dil # sched: [3:1.50]
-; SKYLAKE-NEXT: shlb %cl, %dil # sched: [3:1.50]
-; SKYLAKE-NEXT: shrb %cl, %dil # sched: [3:1.50]
-; SKYLAKE-NEXT: sarb %cl, (%rdx) # sched: [8:1.50]
-; SKYLAKE-NEXT: shlb %cl, (%rdx) # sched: [8:1.50]
-; SKYLAKE-NEXT: shrb %cl, (%rdx) # sched: [8:1.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_sar_shl_shr_8:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: sarb %dil # sched: [1:0.50]
-; SKX-NEXT: shlb %dil # sched: [1:0.50]
-; SKX-NEXT: shrb %dil # sched: [1:0.50]
-; SKX-NEXT: sarb (%rdx) # sched: [6:1.00]
-; SKX-NEXT: shlb (%rdx) # sched: [6:1.00]
-; SKX-NEXT: shrb (%rdx) # sched: [6:1.00]
-; SKX-NEXT: sarb $7, %dil # sched: [1:0.50]
-; SKX-NEXT: shlb $7, %dil # sched: [1:0.50]
-; SKX-NEXT: shrb $7, %dil # sched: [1:0.50]
-; SKX-NEXT: sarb $7, (%rdx) # sched: [6:1.00]
-; SKX-NEXT: shlb $7, (%rdx) # sched: [6:1.00]
-; SKX-NEXT: shrb $7, (%rdx) # sched: [6:1.00]
-; SKX-NEXT: sarb %cl, %dil # sched: [3:1.50]
-; SKX-NEXT: shlb %cl, %dil # sched: [3:1.50]
-; SKX-NEXT: shrb %cl, %dil # sched: [3:1.50]
-; SKX-NEXT: sarb %cl, (%rdx) # sched: [8:1.50]
-; SKX-NEXT: shlb %cl, (%rdx) # sched: [8:1.50]
-; SKX-NEXT: shrb %cl, (%rdx) # sched: [8:1.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_sar_shl_shr_8:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: sarb %dil # sched: [1:0.50]
-; BDVER2-NEXT: shlb %dil # sched: [1:0.50]
-; BDVER2-NEXT: shrb %dil # sched: [1:0.50]
-; BDVER2-NEXT: sarb (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: shlb (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: shrb (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: sarb $7, %dil # sched: [1:0.50]
-; BDVER2-NEXT: shlb $7, %dil # sched: [1:0.50]
-; BDVER2-NEXT: shrb $7, %dil # sched: [1:0.50]
-; BDVER2-NEXT: sarb $7, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: shlb $7, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: shrb $7, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: sarb %cl, %dil # sched: [1:0.50]
-; BDVER2-NEXT: shlb %cl, %dil # sched: [1:0.50]
-; BDVER2-NEXT: shrb %cl, %dil # sched: [1:0.50]
-; BDVER2-NEXT: sarb %cl, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: shlb %cl, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: shrb %cl, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_sar_shl_shr_8:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: sarb %dil # sched: [1:0.50]
-; BTVER2-NEXT: shlb %dil # sched: [1:0.50]
-; BTVER2-NEXT: shrb %dil # sched: [1:0.50]
-; BTVER2-NEXT: sarb (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: shlb (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: shrb (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: sarb $7, %dil # sched: [1:0.50]
-; BTVER2-NEXT: shlb $7, %dil # sched: [1:0.50]
-; BTVER2-NEXT: shrb $7, %dil # sched: [1:0.50]
-; BTVER2-NEXT: sarb $7, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: shlb $7, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: shrb $7, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: sarb %cl, %dil # sched: [1:0.50]
-; BTVER2-NEXT: shlb %cl, %dil # sched: [1:0.50]
-; BTVER2-NEXT: shrb %cl, %dil # sched: [1:0.50]
-; BTVER2-NEXT: sarb %cl, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: shlb %cl, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: shrb %cl, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_sar_shl_shr_8:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: sarb %dil # sched: [1:0.25]
-; ZNVER1-NEXT: shlb %dil # sched: [1:0.25]
-; ZNVER1-NEXT: shrb %dil # sched: [1:0.25]
-; ZNVER1-NEXT: sarb (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: shlb (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: shrb (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: sarb $7, %dil # sched: [1:0.25]
-; ZNVER1-NEXT: shlb $7, %dil # sched: [1:0.25]
-; ZNVER1-NEXT: shrb $7, %dil # sched: [1:0.25]
-; ZNVER1-NEXT: sarb $7, (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: shlb $7, (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: shrb $7, (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: sarb %cl, %dil # sched: [1:0.25]
-; ZNVER1-NEXT: shlb %cl, %dil # sched: [1:0.25]
-; ZNVER1-NEXT: shrb %cl, %dil # sched: [1:0.25]
-; ZNVER1-NEXT: sarb %cl, (%rdx) # sched: [5:1.00]
-; ZNVER1-NEXT: shlb %cl, (%rdx) # sched: [5:1.00]
-; ZNVER1-NEXT: shrb %cl, (%rdx) # sched: [5:1.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "sarb $0 \0A\09 shlb $0 \0A\09 shrb $0 \0A\09 sarb $2 \0A\09 shlb $2 \0A\09 shrb $2 \0A\09 sarb $3, $0 \0A\09 shlb $3, $0 \0A\09 shrb $3, $0 \0A\09 sarb $3, $2 \0A\09 shlb $3, $2 \0A\09 shrb $3, $2 \0A\09 sarb %CL, $0 \0A\09 shlb %CL, $0 \0A\09 shrb %CL, $0 \0A\09 sarb %CL, $2 \0A\09 shlb %CL, $2 \0A\09 shrb %CL, $2", "r,r,*m,i"(i8 %a0, i8 %a1, i8 *%a2, i8 7)
- ret void
-}
-define void @test_sar_shl_shr_16(i16 %a0, i16 %a1, i16 *%a2) optsize {
-; GENERIC-LABEL: test_sar_shl_shr_16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: sarw %di # sched: [1:0.50]
-; GENERIC-NEXT: shlw %di # sched: [1:0.50]
-; GENERIC-NEXT: shrw %di # sched: [1:0.50]
-; GENERIC-NEXT: sarw (%rdx) # sched: [7:1.00]
-; GENERIC-NEXT: shlw (%rdx) # sched: [7:1.00]
-; GENERIC-NEXT: shrw (%rdx) # sched: [7:1.00]
-; GENERIC-NEXT: sarw $7, %di # sched: [1:0.50]
-; GENERIC-NEXT: shlw $7, %di # sched: [1:0.50]
-; GENERIC-NEXT: shrw $7, %di # sched: [1:0.50]
-; GENERIC-NEXT: sarw $7, (%rdx) # sched: [7:1.00]
-; GENERIC-NEXT: shlw $7, (%rdx) # sched: [7:1.00]
-; GENERIC-NEXT: shrw $7, (%rdx) # sched: [7:1.00]
-; GENERIC-NEXT: sarw %cl, %di # sched: [3:1.50]
-; GENERIC-NEXT: shlw %cl, %di # sched: [3:1.50]
-; GENERIC-NEXT: shrw %cl, %di # sched: [3:1.50]
-; GENERIC-NEXT: sarw %cl, (%rdx) # sched: [9:1.50]
-; GENERIC-NEXT: shlw %cl, (%rdx) # sched: [9:1.50]
-; GENERIC-NEXT: shrw %cl, (%rdx) # sched: [9:1.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_sar_shl_shr_16:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: sarw %di # sched: [1:1.00]
-; ATOM-NEXT: shlw %di # sched: [1:1.00]
-; ATOM-NEXT: shrw %di # sched: [1:1.00]
-; ATOM-NEXT: sarw (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: shlw (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: shrw (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: sarw $7, %di # sched: [1:1.00]
-; ATOM-NEXT: shlw $7, %di # sched: [1:1.00]
-; ATOM-NEXT: shrw $7, %di # sched: [1:1.00]
-; ATOM-NEXT: sarw $7, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: shlw $7, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: shrw $7, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: sarw %cl, %di # sched: [1:1.00]
-; ATOM-NEXT: shlw %cl, %di # sched: [1:1.00]
-; ATOM-NEXT: shrw %cl, %di # sched: [1:1.00]
-; ATOM-NEXT: sarw %cl, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: shlw %cl, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: shrw %cl, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_sar_shl_shr_16:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: sarw %di # sched: [1:1.00]
-; SLM-NEXT: shlw %di # sched: [1:1.00]
-; SLM-NEXT: shrw %di # sched: [1:1.00]
-; SLM-NEXT: sarw (%rdx) # sched: [4:2.00]
-; SLM-NEXT: shlw (%rdx) # sched: [4:2.00]
-; SLM-NEXT: shrw (%rdx) # sched: [4:2.00]
-; SLM-NEXT: sarw $7, %di # sched: [1:1.00]
-; SLM-NEXT: shlw $7, %di # sched: [1:1.00]
-; SLM-NEXT: shrw $7, %di # sched: [1:1.00]
-; SLM-NEXT: sarw $7, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: shlw $7, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: shrw $7, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: sarw %cl, %di # sched: [1:1.00]
-; SLM-NEXT: shlw %cl, %di # sched: [1:1.00]
-; SLM-NEXT: shrw %cl, %di # sched: [1:1.00]
-; SLM-NEXT: sarw %cl, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: shlw %cl, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: shrw %cl, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_sar_shl_shr_16:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: sarw %di # sched: [1:0.50]
-; SANDY-NEXT: shlw %di # sched: [1:0.50]
-; SANDY-NEXT: shrw %di # sched: [1:0.50]
-; SANDY-NEXT: sarw (%rdx) # sched: [7:1.00]
-; SANDY-NEXT: shlw (%rdx) # sched: [7:1.00]
-; SANDY-NEXT: shrw (%rdx) # sched: [7:1.00]
-; SANDY-NEXT: sarw $7, %di # sched: [1:0.50]
-; SANDY-NEXT: shlw $7, %di # sched: [1:0.50]
-; SANDY-NEXT: shrw $7, %di # sched: [1:0.50]
-; SANDY-NEXT: sarw $7, (%rdx) # sched: [7:1.00]
-; SANDY-NEXT: shlw $7, (%rdx) # sched: [7:1.00]
-; SANDY-NEXT: shrw $7, (%rdx) # sched: [7:1.00]
-; SANDY-NEXT: sarw %cl, %di # sched: [3:1.50]
-; SANDY-NEXT: shlw %cl, %di # sched: [3:1.50]
-; SANDY-NEXT: shrw %cl, %di # sched: [3:1.50]
-; SANDY-NEXT: sarw %cl, (%rdx) # sched: [9:1.50]
-; SANDY-NEXT: shlw %cl, (%rdx) # sched: [9:1.50]
-; SANDY-NEXT: shrw %cl, (%rdx) # sched: [9:1.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_sar_shl_shr_16:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: sarw %di # sched: [1:0.50]
-; HASWELL-NEXT: shlw %di # sched: [1:0.50]
-; HASWELL-NEXT: shrw %di # sched: [1:0.50]
-; HASWELL-NEXT: sarw (%rdx) # sched: [7:1.00]
-; HASWELL-NEXT: shlw (%rdx) # sched: [7:1.00]
-; HASWELL-NEXT: shrw (%rdx) # sched: [7:1.00]
-; HASWELL-NEXT: sarw $7, %di # sched: [1:0.50]
-; HASWELL-NEXT: shlw $7, %di # sched: [1:0.50]
-; HASWELL-NEXT: shrw $7, %di # sched: [1:0.50]
-; HASWELL-NEXT: sarw $7, (%rdx) # sched: [7:1.00]
-; HASWELL-NEXT: shlw $7, (%rdx) # sched: [7:1.00]
-; HASWELL-NEXT: shrw $7, (%rdx) # sched: [7:1.00]
-; HASWELL-NEXT: sarw %cl, %di # sched: [3:1.00]
-; HASWELL-NEXT: shlw %cl, %di # sched: [3:1.00]
-; HASWELL-NEXT: shrw %cl, %di # sched: [3:1.00]
-; HASWELL-NEXT: sarw %cl, (%rdx) # sched: [9:1.00]
-; HASWELL-NEXT: shlw %cl, (%rdx) # sched: [9:1.00]
-; HASWELL-NEXT: shrw %cl, (%rdx) # sched: [9:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_sar_shl_shr_16:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: sarw %di # sched: [1:0.50]
-; BROADWELL-NEXT: shlw %di # sched: [1:0.50]
-; BROADWELL-NEXT: shrw %di # sched: [1:0.50]
-; BROADWELL-NEXT: sarw (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: shlw (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: shrw (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: sarw $7, %di # sched: [1:0.50]
-; BROADWELL-NEXT: shlw $7, %di # sched: [1:0.50]
-; BROADWELL-NEXT: shrw $7, %di # sched: [1:0.50]
-; BROADWELL-NEXT: sarw $7, (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: shlw $7, (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: shrw $7, (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: sarw %cl, %di # sched: [3:1.00]
-; BROADWELL-NEXT: shlw %cl, %di # sched: [3:1.00]
-; BROADWELL-NEXT: shrw %cl, %di # sched: [3:1.00]
-; BROADWELL-NEXT: sarw %cl, (%rdx) # sched: [8:1.00]
-; BROADWELL-NEXT: shlw %cl, (%rdx) # sched: [8:1.00]
-; BROADWELL-NEXT: shrw %cl, (%rdx) # sched: [8:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_sar_shl_shr_16:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: sarw %di # sched: [1:0.50]
-; SKYLAKE-NEXT: shlw %di # sched: [1:0.50]
-; SKYLAKE-NEXT: shrw %di # sched: [1:0.50]
-; SKYLAKE-NEXT: sarw (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: shlw (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: shrw (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: sarw $7, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: shlw $7, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: shrw $7, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: sarw $7, (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: shlw $7, (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: shrw $7, (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: sarw %cl, %di # sched: [3:1.50]
-; SKYLAKE-NEXT: shlw %cl, %di # sched: [3:1.50]
-; SKYLAKE-NEXT: shrw %cl, %di # sched: [3:1.50]
-; SKYLAKE-NEXT: sarw %cl, (%rdx) # sched: [8:1.50]
-; SKYLAKE-NEXT: shlw %cl, (%rdx) # sched: [8:1.50]
-; SKYLAKE-NEXT: shrw %cl, (%rdx) # sched: [8:1.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_sar_shl_shr_16:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: sarw %di # sched: [1:0.50]
-; SKX-NEXT: shlw %di # sched: [1:0.50]
-; SKX-NEXT: shrw %di # sched: [1:0.50]
-; SKX-NEXT: sarw (%rdx) # sched: [6:1.00]
-; SKX-NEXT: shlw (%rdx) # sched: [6:1.00]
-; SKX-NEXT: shrw (%rdx) # sched: [6:1.00]
-; SKX-NEXT: sarw $7, %di # sched: [1:0.50]
-; SKX-NEXT: shlw $7, %di # sched: [1:0.50]
-; SKX-NEXT: shrw $7, %di # sched: [1:0.50]
-; SKX-NEXT: sarw $7, (%rdx) # sched: [6:1.00]
-; SKX-NEXT: shlw $7, (%rdx) # sched: [6:1.00]
-; SKX-NEXT: shrw $7, (%rdx) # sched: [6:1.00]
-; SKX-NEXT: sarw %cl, %di # sched: [3:1.50]
-; SKX-NEXT: shlw %cl, %di # sched: [3:1.50]
-; SKX-NEXT: shrw %cl, %di # sched: [3:1.50]
-; SKX-NEXT: sarw %cl, (%rdx) # sched: [8:1.50]
-; SKX-NEXT: shlw %cl, (%rdx) # sched: [8:1.50]
-; SKX-NEXT: shrw %cl, (%rdx) # sched: [8:1.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_sar_shl_shr_16:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: sarw %di # sched: [1:0.50]
-; BDVER2-NEXT: shlw %di # sched: [1:0.50]
-; BDVER2-NEXT: shrw %di # sched: [1:0.50]
-; BDVER2-NEXT: sarw (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: shlw (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: shrw (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: sarw $7, %di # sched: [1:0.50]
-; BDVER2-NEXT: shlw $7, %di # sched: [1:0.50]
-; BDVER2-NEXT: shrw $7, %di # sched: [1:0.50]
-; BDVER2-NEXT: sarw $7, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: shlw $7, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: shrw $7, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: sarw %cl, %di # sched: [1:0.50]
-; BDVER2-NEXT: shlw %cl, %di # sched: [1:0.50]
-; BDVER2-NEXT: shrw %cl, %di # sched: [1:0.50]
-; BDVER2-NEXT: sarw %cl, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: shlw %cl, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: shrw %cl, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_sar_shl_shr_16:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: sarw %di # sched: [1:0.50]
-; BTVER2-NEXT: shlw %di # sched: [1:0.50]
-; BTVER2-NEXT: shrw %di # sched: [1:0.50]
-; BTVER2-NEXT: sarw (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: shlw (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: shrw (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: sarw $7, %di # sched: [1:0.50]
-; BTVER2-NEXT: shlw $7, %di # sched: [1:0.50]
-; BTVER2-NEXT: shrw $7, %di # sched: [1:0.50]
-; BTVER2-NEXT: sarw $7, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: shlw $7, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: shrw $7, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: sarw %cl, %di # sched: [1:0.50]
-; BTVER2-NEXT: shlw %cl, %di # sched: [1:0.50]
-; BTVER2-NEXT: shrw %cl, %di # sched: [1:0.50]
-; BTVER2-NEXT: sarw %cl, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: shlw %cl, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: shrw %cl, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_sar_shl_shr_16:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: sarw %di # sched: [1:0.25]
-; ZNVER1-NEXT: shlw %di # sched: [1:0.25]
-; ZNVER1-NEXT: shrw %di # sched: [1:0.25]
-; ZNVER1-NEXT: sarw (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: shlw (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: shrw (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: sarw $7, %di # sched: [1:0.25]
-; ZNVER1-NEXT: shlw $7, %di # sched: [1:0.25]
-; ZNVER1-NEXT: shrw $7, %di # sched: [1:0.25]
-; ZNVER1-NEXT: sarw $7, (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: shlw $7, (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: shrw $7, (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: sarw %cl, %di # sched: [1:0.25]
-; ZNVER1-NEXT: shlw %cl, %di # sched: [1:0.25]
-; ZNVER1-NEXT: shrw %cl, %di # sched: [1:0.25]
-; ZNVER1-NEXT: sarw %cl, (%rdx) # sched: [5:1.00]
-; ZNVER1-NEXT: shlw %cl, (%rdx) # sched: [5:1.00]
-; ZNVER1-NEXT: shrw %cl, (%rdx) # sched: [5:1.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "sarw $0 \0A\09 shlw $0 \0A\09 shrw $0 \0A\09 sarw $2 \0A\09 shlw $2 \0A\09 shrw $2 \0A\09 sarw $3, $0 \0A\09 shlw $3, $0 \0A\09 shrw $3, $0 \0A\09 sarw $3, $2 \0A\09 shlw $3, $2 \0A\09 shrw $3, $2 \0A\09 sarw %CL, $0 \0A\09 shlw %CL, $0 \0A\09 shrw %CL, $0 \0A\09 sarw %CL, $2 \0A\09 shlw %CL, $2 \0A\09 shrw %CL, $2", "r,r,*m,i"(i16 %a0, i16 %a1, i16 *%a2, i8 7)
- ret void
-}
-define void @test_sar_shl_shr_32(i32 %a0, i32 %a1, i32 *%a2) optsize {
-; GENERIC-LABEL: test_sar_shl_shr_32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: sarl %edi # sched: [1:0.50]
-; GENERIC-NEXT: shll %edi # sched: [1:0.50]
-; GENERIC-NEXT: shrl %edi # sched: [1:0.50]
-; GENERIC-NEXT: sarl (%rdx) # sched: [7:1.00]
-; GENERIC-NEXT: shll (%rdx) # sched: [7:1.00]
-; GENERIC-NEXT: shrl (%rdx) # sched: [7:1.00]
-; GENERIC-NEXT: sarl $7, %edi # sched: [1:0.50]
-; GENERIC-NEXT: shll $7, %edi # sched: [1:0.50]
-; GENERIC-NEXT: shrl $7, %edi # sched: [1:0.50]
-; GENERIC-NEXT: sarl $7, (%rdx) # sched: [7:1.00]
-; GENERIC-NEXT: shll $7, (%rdx) # sched: [7:1.00]
-; GENERIC-NEXT: shrl $7, (%rdx) # sched: [7:1.00]
-; GENERIC-NEXT: sarl %cl, %edi # sched: [3:1.50]
-; GENERIC-NEXT: shll %cl, %edi # sched: [3:1.50]
-; GENERIC-NEXT: shrl %cl, %edi # sched: [3:1.50]
-; GENERIC-NEXT: sarl %cl, (%rdx) # sched: [9:1.50]
-; GENERIC-NEXT: shll %cl, (%rdx) # sched: [9:1.50]
-; GENERIC-NEXT: shrl %cl, (%rdx) # sched: [9:1.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_sar_shl_shr_32:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: sarl %edi # sched: [1:1.00]
-; ATOM-NEXT: shll %edi # sched: [1:1.00]
-; ATOM-NEXT: shrl %edi # sched: [1:1.00]
-; ATOM-NEXT: sarl (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: shll (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: shrl (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: sarl $7, %edi # sched: [1:1.00]
-; ATOM-NEXT: shll $7, %edi # sched: [1:1.00]
-; ATOM-NEXT: shrl $7, %edi # sched: [1:1.00]
-; ATOM-NEXT: sarl $7, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: shll $7, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: shrl $7, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: sarl %cl, %edi # sched: [1:1.00]
-; ATOM-NEXT: shll %cl, %edi # sched: [1:1.00]
-; ATOM-NEXT: shrl %cl, %edi # sched: [1:1.00]
-; ATOM-NEXT: sarl %cl, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: shll %cl, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: shrl %cl, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_sar_shl_shr_32:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: sarl %edi # sched: [1:1.00]
-; SLM-NEXT: shll %edi # sched: [1:1.00]
-; SLM-NEXT: shrl %edi # sched: [1:1.00]
-; SLM-NEXT: sarl (%rdx) # sched: [4:2.00]
-; SLM-NEXT: shll (%rdx) # sched: [4:2.00]
-; SLM-NEXT: shrl (%rdx) # sched: [4:2.00]
-; SLM-NEXT: sarl $7, %edi # sched: [1:1.00]
-; SLM-NEXT: shll $7, %edi # sched: [1:1.00]
-; SLM-NEXT: shrl $7, %edi # sched: [1:1.00]
-; SLM-NEXT: sarl $7, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: shll $7, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: shrl $7, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: sarl %cl, %edi # sched: [1:1.00]
-; SLM-NEXT: shll %cl, %edi # sched: [1:1.00]
-; SLM-NEXT: shrl %cl, %edi # sched: [1:1.00]
-; SLM-NEXT: sarl %cl, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: shll %cl, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: shrl %cl, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_sar_shl_shr_32:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: sarl %edi # sched: [1:0.50]
-; SANDY-NEXT: shll %edi # sched: [1:0.50]
-; SANDY-NEXT: shrl %edi # sched: [1:0.50]
-; SANDY-NEXT: sarl (%rdx) # sched: [7:1.00]
-; SANDY-NEXT: shll (%rdx) # sched: [7:1.00]
-; SANDY-NEXT: shrl (%rdx) # sched: [7:1.00]
-; SANDY-NEXT: sarl $7, %edi # sched: [1:0.50]
-; SANDY-NEXT: shll $7, %edi # sched: [1:0.50]
-; SANDY-NEXT: shrl $7, %edi # sched: [1:0.50]
-; SANDY-NEXT: sarl $7, (%rdx) # sched: [7:1.00]
-; SANDY-NEXT: shll $7, (%rdx) # sched: [7:1.00]
-; SANDY-NEXT: shrl $7, (%rdx) # sched: [7:1.00]
-; SANDY-NEXT: sarl %cl, %edi # sched: [3:1.50]
-; SANDY-NEXT: shll %cl, %edi # sched: [3:1.50]
-; SANDY-NEXT: shrl %cl, %edi # sched: [3:1.50]
-; SANDY-NEXT: sarl %cl, (%rdx) # sched: [9:1.50]
-; SANDY-NEXT: shll %cl, (%rdx) # sched: [9:1.50]
-; SANDY-NEXT: shrl %cl, (%rdx) # sched: [9:1.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_sar_shl_shr_32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: sarl %edi # sched: [1:0.50]
-; HASWELL-NEXT: shll %edi # sched: [1:0.50]
-; HASWELL-NEXT: shrl %edi # sched: [1:0.50]
-; HASWELL-NEXT: sarl (%rdx) # sched: [7:1.00]
-; HASWELL-NEXT: shll (%rdx) # sched: [7:1.00]
-; HASWELL-NEXT: shrl (%rdx) # sched: [7:1.00]
-; HASWELL-NEXT: sarl $7, %edi # sched: [1:0.50]
-; HASWELL-NEXT: shll $7, %edi # sched: [1:0.50]
-; HASWELL-NEXT: shrl $7, %edi # sched: [1:0.50]
-; HASWELL-NEXT: sarl $7, (%rdx) # sched: [7:1.00]
-; HASWELL-NEXT: shll $7, (%rdx) # sched: [7:1.00]
-; HASWELL-NEXT: shrl $7, (%rdx) # sched: [7:1.00]
-; HASWELL-NEXT: sarl %cl, %edi # sched: [3:1.00]
-; HASWELL-NEXT: shll %cl, %edi # sched: [3:1.00]
-; HASWELL-NEXT: shrl %cl, %edi # sched: [3:1.00]
-; HASWELL-NEXT: sarl %cl, (%rdx) # sched: [9:1.00]
-; HASWELL-NEXT: shll %cl, (%rdx) # sched: [9:1.00]
-; HASWELL-NEXT: shrl %cl, (%rdx) # sched: [9:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_sar_shl_shr_32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: sarl %edi # sched: [1:0.50]
-; BROADWELL-NEXT: shll %edi # sched: [1:0.50]
-; BROADWELL-NEXT: shrl %edi # sched: [1:0.50]
-; BROADWELL-NEXT: sarl (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: shll (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: shrl (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: sarl $7, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: shll $7, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: shrl $7, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: sarl $7, (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: shll $7, (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: shrl $7, (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: sarl %cl, %edi # sched: [3:1.00]
-; BROADWELL-NEXT: shll %cl, %edi # sched: [3:1.00]
-; BROADWELL-NEXT: shrl %cl, %edi # sched: [3:1.00]
-; BROADWELL-NEXT: sarl %cl, (%rdx) # sched: [8:1.00]
-; BROADWELL-NEXT: shll %cl, (%rdx) # sched: [8:1.00]
-; BROADWELL-NEXT: shrl %cl, (%rdx) # sched: [8:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_sar_shl_shr_32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: sarl %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: shll %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: shrl %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: sarl (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: shll (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: shrl (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: sarl $7, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: shll $7, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: shrl $7, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: sarl $7, (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: shll $7, (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: shrl $7, (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: sarl %cl, %edi # sched: [3:1.50]
-; SKYLAKE-NEXT: shll %cl, %edi # sched: [3:1.50]
-; SKYLAKE-NEXT: shrl %cl, %edi # sched: [3:1.50]
-; SKYLAKE-NEXT: sarl %cl, (%rdx) # sched: [8:1.50]
-; SKYLAKE-NEXT: shll %cl, (%rdx) # sched: [8:1.50]
-; SKYLAKE-NEXT: shrl %cl, (%rdx) # sched: [8:1.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_sar_shl_shr_32:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: sarl %edi # sched: [1:0.50]
-; SKX-NEXT: shll %edi # sched: [1:0.50]
-; SKX-NEXT: shrl %edi # sched: [1:0.50]
-; SKX-NEXT: sarl (%rdx) # sched: [6:1.00]
-; SKX-NEXT: shll (%rdx) # sched: [6:1.00]
-; SKX-NEXT: shrl (%rdx) # sched: [6:1.00]
-; SKX-NEXT: sarl $7, %edi # sched: [1:0.50]
-; SKX-NEXT: shll $7, %edi # sched: [1:0.50]
-; SKX-NEXT: shrl $7, %edi # sched: [1:0.50]
-; SKX-NEXT: sarl $7, (%rdx) # sched: [6:1.00]
-; SKX-NEXT: shll $7, (%rdx) # sched: [6:1.00]
-; SKX-NEXT: shrl $7, (%rdx) # sched: [6:1.00]
-; SKX-NEXT: sarl %cl, %edi # sched: [3:1.50]
-; SKX-NEXT: shll %cl, %edi # sched: [3:1.50]
-; SKX-NEXT: shrl %cl, %edi # sched: [3:1.50]
-; SKX-NEXT: sarl %cl, (%rdx) # sched: [8:1.50]
-; SKX-NEXT: shll %cl, (%rdx) # sched: [8:1.50]
-; SKX-NEXT: shrl %cl, (%rdx) # sched: [8:1.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_sar_shl_shr_32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: sarl %edi # sched: [1:0.50]
-; BDVER2-NEXT: shll %edi # sched: [1:0.50]
-; BDVER2-NEXT: shrl %edi # sched: [1:0.50]
-; BDVER2-NEXT: sarl (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: shll (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: shrl (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: sarl $7, %edi # sched: [1:0.50]
-; BDVER2-NEXT: shll $7, %edi # sched: [1:0.50]
-; BDVER2-NEXT: shrl $7, %edi # sched: [1:0.50]
-; BDVER2-NEXT: sarl $7, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: shll $7, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: shrl $7, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: sarl %cl, %edi # sched: [1:0.50]
-; BDVER2-NEXT: shll %cl, %edi # sched: [1:0.50]
-; BDVER2-NEXT: shrl %cl, %edi # sched: [1:0.50]
-; BDVER2-NEXT: sarl %cl, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: shll %cl, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: shrl %cl, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_sar_shl_shr_32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: sarl %edi # sched: [1:0.50]
-; BTVER2-NEXT: shll %edi # sched: [1:0.50]
-; BTVER2-NEXT: shrl %edi # sched: [1:0.50]
-; BTVER2-NEXT: sarl (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: shll (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: shrl (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: sarl $7, %edi # sched: [1:0.50]
-; BTVER2-NEXT: shll $7, %edi # sched: [1:0.50]
-; BTVER2-NEXT: shrl $7, %edi # sched: [1:0.50]
-; BTVER2-NEXT: sarl $7, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: shll $7, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: shrl $7, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: sarl %cl, %edi # sched: [1:0.50]
-; BTVER2-NEXT: shll %cl, %edi # sched: [1:0.50]
-; BTVER2-NEXT: shrl %cl, %edi # sched: [1:0.50]
-; BTVER2-NEXT: sarl %cl, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: shll %cl, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: shrl %cl, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_sar_shl_shr_32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: sarl %edi # sched: [1:0.25]
-; ZNVER1-NEXT: shll %edi # sched: [1:0.25]
-; ZNVER1-NEXT: shrl %edi # sched: [1:0.25]
-; ZNVER1-NEXT: sarl (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: shll (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: shrl (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: sarl $7, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: shll $7, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: shrl $7, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: sarl $7, (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: shll $7, (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: shrl $7, (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: sarl %cl, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: shll %cl, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: shrl %cl, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: sarl %cl, (%rdx) # sched: [5:1.00]
-; ZNVER1-NEXT: shll %cl, (%rdx) # sched: [5:1.00]
-; ZNVER1-NEXT: shrl %cl, (%rdx) # sched: [5:1.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "sarl $0 \0A\09 shll $0 \0A\09 shrl $0 \0A\09 sarl $2 \0A\09 shll $2 \0A\09 shrl $2 \0A\09 sarl $3, $0 \0A\09 shll $3, $0 \0A\09 shrl $3, $0 \0A\09 sarl $3, $2 \0A\09 shll $3, $2 \0A\09 shrl $3, $2 \0A\09 sarl %CL, $0 \0A\09 shll %CL, $0 \0A\09 shrl %CL, $0 \0A\09 sarl %CL, $2 \0A\09 shll %CL, $2 \0A\09 shrl %CL, $2", "r,r,*m,i"(i32 %a0, i32 %a1, i32 *%a2, i8 7)
- ret void
-}
-define void @test_sar_shl_shr_64(i64 %a0, i64 %a1, i64 *%a2) optsize {
-; GENERIC-LABEL: test_sar_shl_shr_64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: sarq %rdi # sched: [1:0.50]
-; GENERIC-NEXT: shlq %rdi # sched: [1:0.50]
-; GENERIC-NEXT: shrq %rdi # sched: [1:0.50]
-; GENERIC-NEXT: sarq (%rdx) # sched: [7:1.00]
-; GENERIC-NEXT: shlq (%rdx) # sched: [7:1.00]
-; GENERIC-NEXT: shrq (%rdx) # sched: [7:1.00]
-; GENERIC-NEXT: sarq $7, %rdi # sched: [1:0.50]
-; GENERIC-NEXT: shlq $7, %rdi # sched: [1:0.50]
-; GENERIC-NEXT: shrq $7, %rdi # sched: [1:0.50]
-; GENERIC-NEXT: sarq $7, (%rdx) # sched: [7:1.00]
-; GENERIC-NEXT: shlq $7, (%rdx) # sched: [7:1.00]
-; GENERIC-NEXT: shrq $7, (%rdx) # sched: [7:1.00]
-; GENERIC-NEXT: sarq %cl, %rdi # sched: [3:1.50]
-; GENERIC-NEXT: shlq %cl, %rdi # sched: [3:1.50]
-; GENERIC-NEXT: shrq %cl, %rdi # sched: [3:1.50]
-; GENERIC-NEXT: sarq %cl, (%rdx) # sched: [9:1.50]
-; GENERIC-NEXT: shlq %cl, (%rdx) # sched: [9:1.50]
-; GENERIC-NEXT: shrq %cl, (%rdx) # sched: [9:1.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_sar_shl_shr_64:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: sarq %rdi # sched: [1:1.00]
-; ATOM-NEXT: shlq %rdi # sched: [1:1.00]
-; ATOM-NEXT: shrq %rdi # sched: [1:1.00]
-; ATOM-NEXT: sarq (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: shlq (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: shrq (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: sarq $7, %rdi # sched: [1:1.00]
-; ATOM-NEXT: shlq $7, %rdi # sched: [1:1.00]
-; ATOM-NEXT: shrq $7, %rdi # sched: [1:1.00]
-; ATOM-NEXT: sarq $7, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: shlq $7, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: shrq $7, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: sarq %cl, %rdi # sched: [1:1.00]
-; ATOM-NEXT: shlq %cl, %rdi # sched: [1:1.00]
-; ATOM-NEXT: shrq %cl, %rdi # sched: [1:1.00]
-; ATOM-NEXT: sarq %cl, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: shlq %cl, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: shrq %cl, (%rdx) # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_sar_shl_shr_64:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: sarq %rdi # sched: [1:1.00]
-; SLM-NEXT: shlq %rdi # sched: [1:1.00]
-; SLM-NEXT: shrq %rdi # sched: [1:1.00]
-; SLM-NEXT: sarq (%rdx) # sched: [4:2.00]
-; SLM-NEXT: shlq (%rdx) # sched: [4:2.00]
-; SLM-NEXT: shrq (%rdx) # sched: [4:2.00]
-; SLM-NEXT: sarq $7, %rdi # sched: [1:1.00]
-; SLM-NEXT: shlq $7, %rdi # sched: [1:1.00]
-; SLM-NEXT: shrq $7, %rdi # sched: [1:1.00]
-; SLM-NEXT: sarq $7, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: shlq $7, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: shrq $7, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: sarq %cl, %rdi # sched: [1:1.00]
-; SLM-NEXT: shlq %cl, %rdi # sched: [1:1.00]
-; SLM-NEXT: shrq %cl, %rdi # sched: [1:1.00]
-; SLM-NEXT: sarq %cl, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: shlq %cl, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: shrq %cl, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_sar_shl_shr_64:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: sarq %rdi # sched: [1:0.50]
-; SANDY-NEXT: shlq %rdi # sched: [1:0.50]
-; SANDY-NEXT: shrq %rdi # sched: [1:0.50]
-; SANDY-NEXT: sarq (%rdx) # sched: [7:1.00]
-; SANDY-NEXT: shlq (%rdx) # sched: [7:1.00]
-; SANDY-NEXT: shrq (%rdx) # sched: [7:1.00]
-; SANDY-NEXT: sarq $7, %rdi # sched: [1:0.50]
-; SANDY-NEXT: shlq $7, %rdi # sched: [1:0.50]
-; SANDY-NEXT: shrq $7, %rdi # sched: [1:0.50]
-; SANDY-NEXT: sarq $7, (%rdx) # sched: [7:1.00]
-; SANDY-NEXT: shlq $7, (%rdx) # sched: [7:1.00]
-; SANDY-NEXT: shrq $7, (%rdx) # sched: [7:1.00]
-; SANDY-NEXT: sarq %cl, %rdi # sched: [3:1.50]
-; SANDY-NEXT: shlq %cl, %rdi # sched: [3:1.50]
-; SANDY-NEXT: shrq %cl, %rdi # sched: [3:1.50]
-; SANDY-NEXT: sarq %cl, (%rdx) # sched: [9:1.50]
-; SANDY-NEXT: shlq %cl, (%rdx) # sched: [9:1.50]
-; SANDY-NEXT: shrq %cl, (%rdx) # sched: [9:1.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_sar_shl_shr_64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: sarq %rdi # sched: [1:0.50]
-; HASWELL-NEXT: shlq %rdi # sched: [1:0.50]
-; HASWELL-NEXT: shrq %rdi # sched: [1:0.50]
-; HASWELL-NEXT: sarq (%rdx) # sched: [7:1.00]
-; HASWELL-NEXT: shlq (%rdx) # sched: [7:1.00]
-; HASWELL-NEXT: shrq (%rdx) # sched: [7:1.00]
-; HASWELL-NEXT: sarq $7, %rdi # sched: [1:0.50]
-; HASWELL-NEXT: shlq $7, %rdi # sched: [1:0.50]
-; HASWELL-NEXT: shrq $7, %rdi # sched: [1:0.50]
-; HASWELL-NEXT: sarq $7, (%rdx) # sched: [7:1.00]
-; HASWELL-NEXT: shlq $7, (%rdx) # sched: [7:1.00]
-; HASWELL-NEXT: shrq $7, (%rdx) # sched: [7:1.00]
-; HASWELL-NEXT: sarq %cl, %rdi # sched: [3:1.00]
-; HASWELL-NEXT: shlq %cl, %rdi # sched: [3:1.00]
-; HASWELL-NEXT: shrq %cl, %rdi # sched: [3:1.00]
-; HASWELL-NEXT: sarq %cl, (%rdx) # sched: [9:1.00]
-; HASWELL-NEXT: shlq %cl, (%rdx) # sched: [9:1.00]
-; HASWELL-NEXT: shrq %cl, (%rdx) # sched: [9:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_sar_shl_shr_64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: sarq %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: shlq %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: shrq %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: sarq (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: shlq (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: shrq (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: sarq $7, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: shlq $7, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: shrq $7, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: sarq $7, (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: shlq $7, (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: shrq $7, (%rdx) # sched: [6:1.00]
-; BROADWELL-NEXT: sarq %cl, %rdi # sched: [3:1.00]
-; BROADWELL-NEXT: shlq %cl, %rdi # sched: [3:1.00]
-; BROADWELL-NEXT: shrq %cl, %rdi # sched: [3:1.00]
-; BROADWELL-NEXT: sarq %cl, (%rdx) # sched: [8:1.00]
-; BROADWELL-NEXT: shlq %cl, (%rdx) # sched: [8:1.00]
-; BROADWELL-NEXT: shrq %cl, (%rdx) # sched: [8:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_sar_shl_shr_64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: sarq %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: shlq %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: shrq %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: sarq (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: shlq (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: shrq (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: sarq $7, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: shlq $7, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: shrq $7, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: sarq $7, (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: shlq $7, (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: shrq $7, (%rdx) # sched: [6:1.00]
-; SKYLAKE-NEXT: sarq %cl, %rdi # sched: [3:1.50]
-; SKYLAKE-NEXT: shlq %cl, %rdi # sched: [3:1.50]
-; SKYLAKE-NEXT: shrq %cl, %rdi # sched: [3:1.50]
-; SKYLAKE-NEXT: sarq %cl, (%rdx) # sched: [8:1.50]
-; SKYLAKE-NEXT: shlq %cl, (%rdx) # sched: [8:1.50]
-; SKYLAKE-NEXT: shrq %cl, (%rdx) # sched: [8:1.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_sar_shl_shr_64:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: sarq %rdi # sched: [1:0.50]
-; SKX-NEXT: shlq %rdi # sched: [1:0.50]
-; SKX-NEXT: shrq %rdi # sched: [1:0.50]
-; SKX-NEXT: sarq (%rdx) # sched: [6:1.00]
-; SKX-NEXT: shlq (%rdx) # sched: [6:1.00]
-; SKX-NEXT: shrq (%rdx) # sched: [6:1.00]
-; SKX-NEXT: sarq $7, %rdi # sched: [1:0.50]
-; SKX-NEXT: shlq $7, %rdi # sched: [1:0.50]
-; SKX-NEXT: shrq $7, %rdi # sched: [1:0.50]
-; SKX-NEXT: sarq $7, (%rdx) # sched: [6:1.00]
-; SKX-NEXT: shlq $7, (%rdx) # sched: [6:1.00]
-; SKX-NEXT: shrq $7, (%rdx) # sched: [6:1.00]
-; SKX-NEXT: sarq %cl, %rdi # sched: [3:1.50]
-; SKX-NEXT: shlq %cl, %rdi # sched: [3:1.50]
-; SKX-NEXT: shrq %cl, %rdi # sched: [3:1.50]
-; SKX-NEXT: sarq %cl, (%rdx) # sched: [8:1.50]
-; SKX-NEXT: shlq %cl, (%rdx) # sched: [8:1.50]
-; SKX-NEXT: shrq %cl, (%rdx) # sched: [8:1.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_sar_shl_shr_64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: sarq %rdi # sched: [1:0.50]
-; BDVER2-NEXT: shlq %rdi # sched: [1:0.50]
-; BDVER2-NEXT: shrq %rdi # sched: [1:0.50]
-; BDVER2-NEXT: sarq (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: shlq (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: shrq (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: sarq $7, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: shlq $7, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: shrq $7, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: sarq $7, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: shlq $7, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: shrq $7, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: sarq %cl, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: shlq %cl, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: shrq %cl, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: sarq %cl, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: shlq %cl, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: shrq %cl, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_sar_shl_shr_64:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: sarq %rdi # sched: [1:0.50]
-; BTVER2-NEXT: shlq %rdi # sched: [1:0.50]
-; BTVER2-NEXT: shrq %rdi # sched: [1:0.50]
-; BTVER2-NEXT: sarq (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: shlq (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: shrq (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: sarq $7, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: shlq $7, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: shrq $7, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: sarq $7, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: shlq $7, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: shrq $7, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: sarq %cl, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: shlq %cl, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: shrq %cl, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: sarq %cl, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: shlq %cl, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: shrq %cl, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_sar_shl_shr_64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: sarq %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: shlq %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: shrq %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: sarq (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: shlq (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: shrq (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: sarq $7, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: shlq $7, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: shrq $7, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: sarq $7, (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: shlq $7, (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: shrq $7, (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: sarq %cl, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: shlq %cl, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: shrq %cl, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: sarq %cl, (%rdx) # sched: [5:1.00]
-; ZNVER1-NEXT: shlq %cl, (%rdx) # sched: [5:1.00]
-; ZNVER1-NEXT: shrq %cl, (%rdx) # sched: [5:1.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "sarq $0 \0A\09 shlq $0 \0A\09 shrq $0 \0A\09 sarq $2 \0A\09 shlq $2 \0A\09 shrq $2 \0A\09 sarq $3, $0 \0A\09 shlq $3, $0 \0A\09 shrq $3, $0 \0A\09 sarq $3, $2 \0A\09 shlq $3, $2 \0A\09 shrq $3, $2 \0A\09 sarq %CL, $0 \0A\09 shlq %CL, $0 \0A\09 shrq %CL, $0 \0A\09 sarq %CL, $2 \0A\09 shlq %CL, $2 \0A\09 shrq %CL, $2", "r,r,*m,i"(i64 %a0, i64 %a1, i64 *%a2, i8 7)
- ret void
-}
-
-define void @test_sbb_8(i8 %a0, i8* %a1, i8 %a2) optsize {
-; GENERIC-LABEL: test_sbb_8:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: sbbb $7, %al # sched: [2:0.67]
-; GENERIC-NEXT: sbbb $7, %dil # sched: [2:0.67]
-; GENERIC-NEXT: sbbb $7, (%rsi) # sched: [9:1.00]
-; GENERIC-NEXT: sbbb %dl, %dil # sched: [2:0.67]
-; GENERIC-NEXT: sbbb %dil, (%rsi) # sched: [9:1.00]
-; GENERIC-NEXT: sbbb (%rsi), %dil # sched: [7:0.67]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_sbb_8:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: sbbb $7, %al # sched: [1:0.50]
-; ATOM-NEXT: sbbb $7, %dil # sched: [1:0.50]
-; ATOM-NEXT: sbbb $7, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: sbbb %dl, %dil # sched: [1:0.50]
-; ATOM-NEXT: sbbb %dil, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: sbbb (%rsi), %dil # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_sbb_8:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: sbbb $7, %al # sched: [1:0.50]
-; SLM-NEXT: sbbb $7, %dil # sched: [1:0.50]
-; SLM-NEXT: sbbb $7, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: sbbb %dl, %dil # sched: [1:0.50]
-; SLM-NEXT: sbbb %dil, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: sbbb (%rsi), %dil # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_sbb_8:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: sbbb $7, %al # sched: [2:0.67]
-; SANDY-NEXT: sbbb $7, %dil # sched: [2:0.67]
-; SANDY-NEXT: sbbb $7, (%rsi) # sched: [9:1.00]
-; SANDY-NEXT: sbbb %dl, %dil # sched: [2:0.67]
-; SANDY-NEXT: sbbb %dil, (%rsi) # sched: [9:1.00]
-; SANDY-NEXT: sbbb (%rsi), %dil # sched: [7:0.67]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_sbb_8:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: sbbb $7, %al # sched: [2:0.50]
-; HASWELL-NEXT: sbbb $7, %dil # sched: [2:0.50]
-; HASWELL-NEXT: sbbb $7, (%rsi) # sched: [9:1.00]
-; HASWELL-NEXT: sbbb %dl, %dil # sched: [2:0.50]
-; HASWELL-NEXT: sbbb %dil, (%rsi) # sched: [9:1.00]
-; HASWELL-NEXT: sbbb (%rsi), %dil # sched: [7:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_sbb_8:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: sbbb $7, %al # sched: [2:0.50]
-; BROADWELL-NEXT: sbbb $7, %dil # sched: [2:0.50]
-; BROADWELL-NEXT: sbbb $7, (%rsi) # sched: [8:1.00]
-; BROADWELL-NEXT: sbbb %dl, %dil # sched: [1:0.50]
-; BROADWELL-NEXT: sbbb %dil, (%rsi) # sched: [8:1.00]
-; BROADWELL-NEXT: sbbb (%rsi), %dil # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_sbb_8:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: sbbb $7, %al # sched: [2:0.50]
-; SKYLAKE-NEXT: sbbb $7, %dil # sched: [2:0.50]
-; SKYLAKE-NEXT: sbbb $7, (%rsi) # sched: [8:1.00]
-; SKYLAKE-NEXT: sbbb %dl, %dil # sched: [1:0.50]
-; SKYLAKE-NEXT: sbbb %dil, (%rsi) # sched: [8:1.00]
-; SKYLAKE-NEXT: sbbb (%rsi), %dil # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_sbb_8:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: sbbb $7, %al # sched: [2:0.50]
-; SKX-NEXT: sbbb $7, %dil # sched: [2:0.50]
-; SKX-NEXT: sbbb $7, (%rsi) # sched: [8:1.00]
-; SKX-NEXT: sbbb %dl, %dil # sched: [1:0.50]
-; SKX-NEXT: sbbb %dil, (%rsi) # sched: [8:1.00]
-; SKX-NEXT: sbbb (%rsi), %dil # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_sbb_8:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: sbbb $7, %al # sched: [1:1.00]
-; BDVER2-NEXT: sbbb $7, %dil # sched: [1:1.00]
-; BDVER2-NEXT: sbbb $7, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: sbbb %dl, %dil # sched: [1:1.00]
-; BDVER2-NEXT: sbbb %dil, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: sbbb (%rsi), %dil # sched: [5:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_sbb_8:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: sbbb $7, %al # sched: [1:1.00]
-; BTVER2-NEXT: sbbb $7, %dil # sched: [1:1.00]
-; BTVER2-NEXT: sbbb $7, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: sbbb %dl, %dil # sched: [1:1.00]
-; BTVER2-NEXT: sbbb %dil, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: sbbb (%rsi), %dil # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_sbb_8:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: sbbb $7, %al # sched: [1:0.25]
-; ZNVER1-NEXT: sbbb $7, %dil # sched: [1:0.25]
-; ZNVER1-NEXT: sbbb $7, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: sbbb %dl, %dil # sched: [1:0.25]
-; ZNVER1-NEXT: sbbb %dil, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: sbbb (%rsi), %dil # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "sbbb $3, %AL \0A\09 sbbb $3, $0 \0A\09 sbbb $3, $2 \0A\09 sbbb $1, $0 \0A\09 sbbb $0, $2 \0A\09 sbbb $2, $0", "r,r,*m,i"(i8 %a0, i8 %a2, i8* %a1, i8 7) nounwind
- ret void
-}
-define void @test_sbb_16(i16 %a0, i16* %a1, i16 %a2) optsize {
-; GENERIC-LABEL: test_sbb_16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: sbbw $511, %ax # imm = 0x1FF
-; GENERIC-NEXT: # sched: [2:0.67]
-; GENERIC-NEXT: sbbw $511, %di # imm = 0x1FF
-; GENERIC-NEXT: # sched: [2:0.67]
-; GENERIC-NEXT: sbbw $511, (%rsi) # imm = 0x1FF
-; GENERIC-NEXT: # sched: [9:1.00]
-; GENERIC-NEXT: sbbw $7, %di # sched: [2:0.67]
-; GENERIC-NEXT: sbbw $7, (%rsi) # sched: [9:1.00]
-; GENERIC-NEXT: sbbw %dx, %di # sched: [2:0.67]
-; GENERIC-NEXT: sbbw %di, (%rsi) # sched: [9:1.00]
-; GENERIC-NEXT: sbbw (%rsi), %di # sched: [7:0.67]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_sbb_16:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: sbbw $511, %ax # imm = 0x1FF
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: sbbw $511, %di # imm = 0x1FF
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: sbbw $511, (%rsi) # imm = 0x1FF
-; ATOM-NEXT: # sched: [1:1.00]
-; ATOM-NEXT: sbbw $7, %di # sched: [1:0.50]
-; ATOM-NEXT: sbbw $7, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: sbbw %dx, %di # sched: [1:0.50]
-; ATOM-NEXT: sbbw %di, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: sbbw (%rsi), %di # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_sbb_16:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: sbbw $511, %ax # imm = 0x1FF
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: sbbw $511, %di # imm = 0x1FF
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: sbbw $511, (%rsi) # imm = 0x1FF
-; SLM-NEXT: # sched: [5:2.00]
-; SLM-NEXT: sbbw $7, %di # sched: [1:0.50]
-; SLM-NEXT: sbbw $7, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: sbbw %dx, %di # sched: [1:0.50]
-; SLM-NEXT: sbbw %di, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: sbbw (%rsi), %di # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_sbb_16:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: sbbw $511, %ax # imm = 0x1FF
-; SANDY-NEXT: # sched: [2:0.67]
-; SANDY-NEXT: sbbw $511, %di # imm = 0x1FF
-; SANDY-NEXT: # sched: [2:0.67]
-; SANDY-NEXT: sbbw $511, (%rsi) # imm = 0x1FF
-; SANDY-NEXT: # sched: [9:1.00]
-; SANDY-NEXT: sbbw $7, %di # sched: [2:0.67]
-; SANDY-NEXT: sbbw $7, (%rsi) # sched: [9:1.00]
-; SANDY-NEXT: sbbw %dx, %di # sched: [2:0.67]
-; SANDY-NEXT: sbbw %di, (%rsi) # sched: [9:1.00]
-; SANDY-NEXT: sbbw (%rsi), %di # sched: [7:0.67]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_sbb_16:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: sbbw $511, %ax # imm = 0x1FF
-; HASWELL-NEXT: # sched: [2:0.50]
-; HASWELL-NEXT: sbbw $511, %di # imm = 0x1FF
-; HASWELL-NEXT: # sched: [2:0.50]
-; HASWELL-NEXT: sbbw $511, (%rsi) # imm = 0x1FF
-; HASWELL-NEXT: # sched: [9:1.00]
-; HASWELL-NEXT: sbbw $7, %di # sched: [2:0.50]
-; HASWELL-NEXT: sbbw $7, (%rsi) # sched: [9:1.00]
-; HASWELL-NEXT: sbbw %dx, %di # sched: [2:0.50]
-; HASWELL-NEXT: sbbw %di, (%rsi) # sched: [9:1.00]
-; HASWELL-NEXT: sbbw (%rsi), %di # sched: [7:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_sbb_16:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: sbbw $511, %ax # imm = 0x1FF
-; BROADWELL-NEXT: # sched: [1:0.50]
-; BROADWELL-NEXT: sbbw $511, %di # imm = 0x1FF
-; BROADWELL-NEXT: # sched: [1:0.50]
-; BROADWELL-NEXT: sbbw $511, (%rsi) # imm = 0x1FF
-; BROADWELL-NEXT: # sched: [8:1.00]
-; BROADWELL-NEXT: sbbw $7, %di # sched: [1:0.50]
-; BROADWELL-NEXT: sbbw $7, (%rsi) # sched: [8:1.00]
-; BROADWELL-NEXT: sbbw %dx, %di # sched: [1:0.50]
-; BROADWELL-NEXT: sbbw %di, (%rsi) # sched: [8:1.00]
-; BROADWELL-NEXT: sbbw (%rsi), %di # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_sbb_16:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: sbbw $511, %ax # imm = 0x1FF
-; SKYLAKE-NEXT: # sched: [1:0.50]
-; SKYLAKE-NEXT: sbbw $511, %di # imm = 0x1FF
-; SKYLAKE-NEXT: # sched: [1:0.50]
-; SKYLAKE-NEXT: sbbw $511, (%rsi) # imm = 0x1FF
-; SKYLAKE-NEXT: # sched: [8:1.00]
-; SKYLAKE-NEXT: sbbw $7, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: sbbw $7, (%rsi) # sched: [8:1.00]
-; SKYLAKE-NEXT: sbbw %dx, %di # sched: [1:0.50]
-; SKYLAKE-NEXT: sbbw %di, (%rsi) # sched: [8:1.00]
-; SKYLAKE-NEXT: sbbw (%rsi), %di # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_sbb_16:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: sbbw $511, %ax # imm = 0x1FF
-; SKX-NEXT: # sched: [1:0.50]
-; SKX-NEXT: sbbw $511, %di # imm = 0x1FF
-; SKX-NEXT: # sched: [1:0.50]
-; SKX-NEXT: sbbw $511, (%rsi) # imm = 0x1FF
-; SKX-NEXT: # sched: [8:1.00]
-; SKX-NEXT: sbbw $7, %di # sched: [1:0.50]
-; SKX-NEXT: sbbw $7, (%rsi) # sched: [8:1.00]
-; SKX-NEXT: sbbw %dx, %di # sched: [1:0.50]
-; SKX-NEXT: sbbw %di, (%rsi) # sched: [8:1.00]
-; SKX-NEXT: sbbw (%rsi), %di # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_sbb_16:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: sbbw $511, %ax # imm = 0x1FF
-; BDVER2-NEXT: # sched: [1:1.00]
-; BDVER2-NEXT: sbbw $511, %di # imm = 0x1FF
-; BDVER2-NEXT: # sched: [1:1.00]
-; BDVER2-NEXT: sbbw $511, (%rsi) # imm = 0x1FF
-; BDVER2-NEXT: # sched: [6:1.00]
-; BDVER2-NEXT: sbbw $7, %di # sched: [1:1.00]
-; BDVER2-NEXT: sbbw $7, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: sbbw %dx, %di # sched: [1:1.00]
-; BDVER2-NEXT: sbbw %di, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: sbbw (%rsi), %di # sched: [5:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_sbb_16:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: sbbw $511, %ax # imm = 0x1FF
-; BTVER2-NEXT: # sched: [1:1.00]
-; BTVER2-NEXT: sbbw $511, %di # imm = 0x1FF
-; BTVER2-NEXT: # sched: [1:1.00]
-; BTVER2-NEXT: sbbw $511, (%rsi) # imm = 0x1FF
-; BTVER2-NEXT: # sched: [5:1.00]
-; BTVER2-NEXT: sbbw $7, %di # sched: [1:1.00]
-; BTVER2-NEXT: sbbw $7, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: sbbw %dx, %di # sched: [1:1.00]
-; BTVER2-NEXT: sbbw %di, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: sbbw (%rsi), %di # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_sbb_16:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: sbbw $511, %ax # imm = 0x1FF
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: sbbw $511, %di # imm = 0x1FF
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: sbbw $511, (%rsi) # imm = 0x1FF
-; ZNVER1-NEXT: # sched: [5:0.50]
-; ZNVER1-NEXT: sbbw $7, %di # sched: [1:0.25]
-; ZNVER1-NEXT: sbbw $7, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: sbbw %dx, %di # sched: [1:0.25]
-; ZNVER1-NEXT: sbbw %di, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: sbbw (%rsi), %di # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "sbbw $3, %AX \0A\09 sbbw $3, $0 \0A\09 sbbw $3, $2 \0A\09 sbbw $4, $0 \0A\09 sbbw $4, $2 \0A\09 sbbw $1, $0 \0A\09 sbbw $0, $2 \0A\09 sbbw $2, $0", "r,r,*m,i,i"(i16 %a0, i16 %a2, i16* %a1, i16 511, i8 7) nounwind
- ret void
-}
-define void @test_sbb_32(i32 %a0, i32* %a1, i32 %a2) optsize {
-; GENERIC-LABEL: test_sbb_32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: sbbl $665536, %eax # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [2:0.67]
-; GENERIC-NEXT: sbbl $665536, %edi # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [2:0.67]
-; GENERIC-NEXT: sbbl $665536, (%rsi) # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [9:1.00]
-; GENERIC-NEXT: sbbl $7, %edi # sched: [2:0.67]
-; GENERIC-NEXT: sbbl $7, (%rsi) # sched: [9:1.00]
-; GENERIC-NEXT: sbbl %edx, %edi # sched: [2:0.67]
-; GENERIC-NEXT: sbbl %edi, (%rsi) # sched: [9:1.00]
-; GENERIC-NEXT: sbbl (%rsi), %edi # sched: [7:0.67]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_sbb_32:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: sbbl $665536, %eax # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: sbbl $665536, %edi # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: sbbl $665536, (%rsi) # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:1.00]
-; ATOM-NEXT: sbbl $7, %edi # sched: [1:0.50]
-; ATOM-NEXT: sbbl $7, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: sbbl %edx, %edi # sched: [1:0.50]
-; ATOM-NEXT: sbbl %edi, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: sbbl (%rsi), %edi # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_sbb_32:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: sbbl $665536, %eax # imm = 0xA27C0
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: sbbl $665536, %edi # imm = 0xA27C0
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: sbbl $665536, (%rsi) # imm = 0xA27C0
-; SLM-NEXT: # sched: [5:2.00]
-; SLM-NEXT: sbbl $7, %edi # sched: [1:0.50]
-; SLM-NEXT: sbbl $7, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: sbbl %edx, %edi # sched: [1:0.50]
-; SLM-NEXT: sbbl %edi, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: sbbl (%rsi), %edi # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_sbb_32:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: sbbl $665536, %eax # imm = 0xA27C0
-; SANDY-NEXT: # sched: [2:0.67]
-; SANDY-NEXT: sbbl $665536, %edi # imm = 0xA27C0
-; SANDY-NEXT: # sched: [2:0.67]
-; SANDY-NEXT: sbbl $665536, (%rsi) # imm = 0xA27C0
-; SANDY-NEXT: # sched: [9:1.00]
-; SANDY-NEXT: sbbl $7, %edi # sched: [2:0.67]
-; SANDY-NEXT: sbbl $7, (%rsi) # sched: [9:1.00]
-; SANDY-NEXT: sbbl %edx, %edi # sched: [2:0.67]
-; SANDY-NEXT: sbbl %edi, (%rsi) # sched: [9:1.00]
-; SANDY-NEXT: sbbl (%rsi), %edi # sched: [7:0.67]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_sbb_32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: sbbl $665536, %eax # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [2:0.50]
-; HASWELL-NEXT: sbbl $665536, %edi # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [2:0.50]
-; HASWELL-NEXT: sbbl $665536, (%rsi) # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [9:1.00]
-; HASWELL-NEXT: sbbl $7, %edi # sched: [2:0.50]
-; HASWELL-NEXT: sbbl $7, (%rsi) # sched: [9:1.00]
-; HASWELL-NEXT: sbbl %edx, %edi # sched: [2:0.50]
-; HASWELL-NEXT: sbbl %edi, (%rsi) # sched: [9:1.00]
-; HASWELL-NEXT: sbbl (%rsi), %edi # sched: [7:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_sbb_32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: sbbl $665536, %eax # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [1:0.50]
-; BROADWELL-NEXT: sbbl $665536, %edi # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [1:0.50]
-; BROADWELL-NEXT: sbbl $665536, (%rsi) # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [8:1.00]
-; BROADWELL-NEXT: sbbl $7, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: sbbl $7, (%rsi) # sched: [8:1.00]
-; BROADWELL-NEXT: sbbl %edx, %edi # sched: [1:0.50]
-; BROADWELL-NEXT: sbbl %edi, (%rsi) # sched: [8:1.00]
-; BROADWELL-NEXT: sbbl (%rsi), %edi # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_sbb_32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: sbbl $665536, %eax # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [1:0.50]
-; SKYLAKE-NEXT: sbbl $665536, %edi # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [1:0.50]
-; SKYLAKE-NEXT: sbbl $665536, (%rsi) # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [8:1.00]
-; SKYLAKE-NEXT: sbbl $7, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: sbbl $7, (%rsi) # sched: [8:1.00]
-; SKYLAKE-NEXT: sbbl %edx, %edi # sched: [1:0.50]
-; SKYLAKE-NEXT: sbbl %edi, (%rsi) # sched: [8:1.00]
-; SKYLAKE-NEXT: sbbl (%rsi), %edi # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_sbb_32:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: sbbl $665536, %eax # imm = 0xA27C0
-; SKX-NEXT: # sched: [1:0.50]
-; SKX-NEXT: sbbl $665536, %edi # imm = 0xA27C0
-; SKX-NEXT: # sched: [1:0.50]
-; SKX-NEXT: sbbl $665536, (%rsi) # imm = 0xA27C0
-; SKX-NEXT: # sched: [8:1.00]
-; SKX-NEXT: sbbl $7, %edi # sched: [1:0.50]
-; SKX-NEXT: sbbl $7, (%rsi) # sched: [8:1.00]
-; SKX-NEXT: sbbl %edx, %edi # sched: [1:0.50]
-; SKX-NEXT: sbbl %edi, (%rsi) # sched: [8:1.00]
-; SKX-NEXT: sbbl (%rsi), %edi # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_sbb_32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: sbbl $665536, %eax # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [1:1.00]
-; BDVER2-NEXT: sbbl $665536, %edi # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [1:1.00]
-; BDVER2-NEXT: sbbl $665536, (%rsi) # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [6:1.00]
-; BDVER2-NEXT: sbbl $7, %edi # sched: [1:1.00]
-; BDVER2-NEXT: sbbl $7, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: sbbl %edx, %edi # sched: [1:1.00]
-; BDVER2-NEXT: sbbl %edi, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: sbbl (%rsi), %edi # sched: [5:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_sbb_32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: sbbl $665536, %eax # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [1:1.00]
-; BTVER2-NEXT: sbbl $665536, %edi # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [1:1.00]
-; BTVER2-NEXT: sbbl $665536, (%rsi) # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [5:1.00]
-; BTVER2-NEXT: sbbl $7, %edi # sched: [1:1.00]
-; BTVER2-NEXT: sbbl $7, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: sbbl %edx, %edi # sched: [1:1.00]
-; BTVER2-NEXT: sbbl %edi, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: sbbl (%rsi), %edi # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_sbb_32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: sbbl $665536, %eax # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: sbbl $665536, %edi # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: sbbl $665536, (%rsi) # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [5:0.50]
-; ZNVER1-NEXT: sbbl $7, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: sbbl $7, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: sbbl %edx, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: sbbl %edi, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: sbbl (%rsi), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "sbbl $3, %EAX \0A\09 sbbl $3, $0 \0A\09 sbbl $3, $2 \0A\09 sbbl $4, $0 \0A\09 sbbl $4, $2 \0A\09 sbbl $1, $0 \0A\09 sbbl $0, $2 \0A\09 sbbl $2, $0", "r,r,*m,i,i"(i32 %a0, i32 %a2, i32* %a1, i32 665536, i8 7) nounwind
- ret void
-}
-define void @test_sbb_64(i64 %a0, i64* %a1, i64 %a2) optsize {
-; GENERIC-LABEL: test_sbb_64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: sbbq $665536, %rax # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [2:0.67]
-; GENERIC-NEXT: sbbq $665536, %rdi # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [2:0.67]
-; GENERIC-NEXT: sbbq $665536, (%rsi) # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [9:1.00]
-; GENERIC-NEXT: sbbq $7, %rdi # sched: [2:0.67]
-; GENERIC-NEXT: sbbq $7, (%rsi) # sched: [9:1.00]
-; GENERIC-NEXT: sbbq %rdx, %rdi # sched: [2:0.67]
-; GENERIC-NEXT: sbbq %rdi, (%rsi) # sched: [9:1.00]
-; GENERIC-NEXT: sbbq (%rsi), %rdi # sched: [7:0.67]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_sbb_64:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: sbbq $665536, %rax # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: sbbq $665536, %rdi # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: sbbq $665536, (%rsi) # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:1.00]
-; ATOM-NEXT: sbbq $7, %rdi # sched: [1:0.50]
-; ATOM-NEXT: sbbq $7, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: sbbq %rdx, %rdi # sched: [1:0.50]
-; ATOM-NEXT: sbbq %rdi, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: sbbq (%rsi), %rdi # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_sbb_64:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: sbbq $665536, %rax # imm = 0xA27C0
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: sbbq $665536, %rdi # imm = 0xA27C0
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: sbbq $665536, (%rsi) # imm = 0xA27C0
-; SLM-NEXT: # sched: [5:2.00]
-; SLM-NEXT: sbbq $7, %rdi # sched: [1:0.50]
-; SLM-NEXT: sbbq $7, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: sbbq %rdx, %rdi # sched: [1:0.50]
-; SLM-NEXT: sbbq %rdi, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: sbbq (%rsi), %rdi # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_sbb_64:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: sbbq $665536, %rax # imm = 0xA27C0
-; SANDY-NEXT: # sched: [2:0.67]
-; SANDY-NEXT: sbbq $665536, %rdi # imm = 0xA27C0
-; SANDY-NEXT: # sched: [2:0.67]
-; SANDY-NEXT: sbbq $665536, (%rsi) # imm = 0xA27C0
-; SANDY-NEXT: # sched: [9:1.00]
-; SANDY-NEXT: sbbq $7, %rdi # sched: [2:0.67]
-; SANDY-NEXT: sbbq $7, (%rsi) # sched: [9:1.00]
-; SANDY-NEXT: sbbq %rdx, %rdi # sched: [2:0.67]
-; SANDY-NEXT: sbbq %rdi, (%rsi) # sched: [9:1.00]
-; SANDY-NEXT: sbbq (%rsi), %rdi # sched: [7:0.67]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_sbb_64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: sbbq $665536, %rax # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [2:0.50]
-; HASWELL-NEXT: sbbq $665536, %rdi # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [2:0.50]
-; HASWELL-NEXT: sbbq $665536, (%rsi) # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [9:1.00]
-; HASWELL-NEXT: sbbq $7, %rdi # sched: [2:0.50]
-; HASWELL-NEXT: sbbq $7, (%rsi) # sched: [9:1.00]
-; HASWELL-NEXT: sbbq %rdx, %rdi # sched: [2:0.50]
-; HASWELL-NEXT: sbbq %rdi, (%rsi) # sched: [9:1.00]
-; HASWELL-NEXT: sbbq (%rsi), %rdi # sched: [7:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_sbb_64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: sbbq $665536, %rax # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [1:0.50]
-; BROADWELL-NEXT: sbbq $665536, %rdi # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [1:0.50]
-; BROADWELL-NEXT: sbbq $665536, (%rsi) # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [8:1.00]
-; BROADWELL-NEXT: sbbq $7, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: sbbq $7, (%rsi) # sched: [8:1.00]
-; BROADWELL-NEXT: sbbq %rdx, %rdi # sched: [1:0.50]
-; BROADWELL-NEXT: sbbq %rdi, (%rsi) # sched: [8:1.00]
-; BROADWELL-NEXT: sbbq (%rsi), %rdi # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_sbb_64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: sbbq $665536, %rax # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [1:0.50]
-; SKYLAKE-NEXT: sbbq $665536, %rdi # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [1:0.50]
-; SKYLAKE-NEXT: sbbq $665536, (%rsi) # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [8:1.00]
-; SKYLAKE-NEXT: sbbq $7, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: sbbq $7, (%rsi) # sched: [8:1.00]
-; SKYLAKE-NEXT: sbbq %rdx, %rdi # sched: [1:0.50]
-; SKYLAKE-NEXT: sbbq %rdi, (%rsi) # sched: [8:1.00]
-; SKYLAKE-NEXT: sbbq (%rsi), %rdi # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_sbb_64:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: sbbq $665536, %rax # imm = 0xA27C0
-; SKX-NEXT: # sched: [1:0.50]
-; SKX-NEXT: sbbq $665536, %rdi # imm = 0xA27C0
-; SKX-NEXT: # sched: [1:0.50]
-; SKX-NEXT: sbbq $665536, (%rsi) # imm = 0xA27C0
-; SKX-NEXT: # sched: [8:1.00]
-; SKX-NEXT: sbbq $7, %rdi # sched: [1:0.50]
-; SKX-NEXT: sbbq $7, (%rsi) # sched: [8:1.00]
-; SKX-NEXT: sbbq %rdx, %rdi # sched: [1:0.50]
-; SKX-NEXT: sbbq %rdi, (%rsi) # sched: [8:1.00]
-; SKX-NEXT: sbbq (%rsi), %rdi # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_sbb_64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: sbbq $665536, %rax # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [1:1.00]
-; BDVER2-NEXT: sbbq $665536, %rdi # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [1:1.00]
-; BDVER2-NEXT: sbbq $665536, (%rsi) # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [6:1.00]
-; BDVER2-NEXT: sbbq $7, %rdi # sched: [1:1.00]
-; BDVER2-NEXT: sbbq $7, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: sbbq %rdx, %rdi # sched: [1:1.00]
-; BDVER2-NEXT: sbbq %rdi, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: sbbq (%rsi), %rdi # sched: [5:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_sbb_64:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: sbbq $665536, %rax # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [1:1.00]
-; BTVER2-NEXT: sbbq $665536, %rdi # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [1:1.00]
-; BTVER2-NEXT: sbbq $665536, (%rsi) # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [5:1.00]
-; BTVER2-NEXT: sbbq $7, %rdi # sched: [1:1.00]
-; BTVER2-NEXT: sbbq $7, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: sbbq %rdx, %rdi # sched: [1:1.00]
-; BTVER2-NEXT: sbbq %rdi, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: sbbq (%rsi), %rdi # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_sbb_64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: sbbq $665536, %rax # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: sbbq $665536, %rdi # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: sbbq $665536, (%rsi) # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [5:0.50]
-; ZNVER1-NEXT: sbbq $7, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: sbbq $7, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: sbbq %rdx, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: sbbq %rdi, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: sbbq (%rsi), %rdi # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "sbbq $3, %RAX \0A\09 sbbq $3, $0 \0A\09 sbbq $3, $2 \0A\09 sbbq $4, $0 \0A\09 sbbq $4, $2 \0A\09 sbbq $1, $0 \0A\09 sbbq $0, $2 \0A\09 sbbq $2, $0", "r,r,*m,i,i"(i64 %a0, i64 %a2, i64* %a1, i32 665536, i8 7) nounwind
- ret void
-}
-
-define void @test_scas() optsize {
-; GENERIC-LABEL: test_scas:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: scasb %es:(%rdi), %al # sched: [2:0.67]
-; GENERIC-NEXT: scasw %es:(%rdi), %ax # sched: [2:0.67]
-; GENERIC-NEXT: scasl %es:(%rdi), %eax # sched: [2:0.67]
-; GENERIC-NEXT: scasq %es:(%rdi), %rax # sched: [2:0.67]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_scas:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: scasb %es:(%rdi), %al # sched: [2:1.00]
-; ATOM-NEXT: scasw %es:(%rdi), %ax # sched: [2:1.00]
-; ATOM-NEXT: scasl %es:(%rdi), %eax # sched: [2:1.00]
-; ATOM-NEXT: scasq %es:(%rdi), %rax # sched: [2:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_scas:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: scasb %es:(%rdi), %al # sched: [100:1.00]
-; SLM-NEXT: scasw %es:(%rdi), %ax # sched: [100:1.00]
-; SLM-NEXT: scasl %es:(%rdi), %eax # sched: [100:1.00]
-; SLM-NEXT: scasq %es:(%rdi), %rax # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_scas:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: scasb %es:(%rdi), %al # sched: [2:0.67]
-; SANDY-NEXT: scasw %es:(%rdi), %ax # sched: [2:0.67]
-; SANDY-NEXT: scasl %es:(%rdi), %eax # sched: [2:0.67]
-; SANDY-NEXT: scasq %es:(%rdi), %rax # sched: [2:0.67]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_scas:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: scasb %es:(%rdi), %al # sched: [7:0.50]
-; HASWELL-NEXT: scasw %es:(%rdi), %ax # sched: [7:0.50]
-; HASWELL-NEXT: scasl %es:(%rdi), %eax # sched: [7:0.50]
-; HASWELL-NEXT: scasq %es:(%rdi), %rax # sched: [7:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_scas:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: scasb %es:(%rdi), %al # sched: [7:0.50]
-; BROADWELL-NEXT: scasw %es:(%rdi), %ax # sched: [7:0.50]
-; BROADWELL-NEXT: scasl %es:(%rdi), %eax # sched: [7:0.50]
-; BROADWELL-NEXT: scasq %es:(%rdi), %rax # sched: [7:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_scas:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: scasb %es:(%rdi), %al # sched: [7:0.50]
-; SKYLAKE-NEXT: scasw %es:(%rdi), %ax # sched: [7:0.50]
-; SKYLAKE-NEXT: scasl %es:(%rdi), %eax # sched: [7:0.50]
-; SKYLAKE-NEXT: scasq %es:(%rdi), %rax # sched: [7:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_scas:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: scasb %es:(%rdi), %al # sched: [7:0.50]
-; SKX-NEXT: scasw %es:(%rdi), %ax # sched: [7:0.50]
-; SKX-NEXT: scasl %es:(%rdi), %eax # sched: [7:0.50]
-; SKX-NEXT: scasq %es:(%rdi), %rax # sched: [7:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_scas:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: scasb %es:(%rdi), %al # sched: [100:0.50]
-; BDVER2-NEXT: scasw %es:(%rdi), %ax # sched: [100:0.50]
-; BDVER2-NEXT: scasl %es:(%rdi), %eax # sched: [100:0.50]
-; BDVER2-NEXT: scasq %es:(%rdi), %rax # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_scas:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: scasb %es:(%rdi), %al # sched: [100:0.50]
-; BTVER2-NEXT: scasw %es:(%rdi), %ax # sched: [100:0.50]
-; BTVER2-NEXT: scasl %es:(%rdi), %eax # sched: [100:0.50]
-; BTVER2-NEXT: scasq %es:(%rdi), %rax # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_scas:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: scasb %es:(%rdi), %al # sched: [100:0.25]
-; ZNVER1-NEXT: scasw %es:(%rdi), %ax # sched: [100:0.25]
-; ZNVER1-NEXT: scasl %es:(%rdi), %eax # sched: [100:0.25]
-; ZNVER1-NEXT: scasq %es:(%rdi), %rax # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "scasb \0A\09 scasw \0A\09 scasl \0A\09 scasq", ""()
- ret void
-}
-
-define void @test_setcc(i8 %a0, i8 *%a1) optsize {
-; GENERIC-LABEL: test_setcc:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: seto %dil # sched: [1:0.50]
-; GENERIC-NEXT: setno %dil # sched: [1:0.50]
-; GENERIC-NEXT: setb %dil # sched: [1:0.50]
-; GENERIC-NEXT: setae %dil # sched: [1:0.50]
-; GENERIC-NEXT: sete %dil # sched: [1:0.50]
-; GENERIC-NEXT: setne %dil # sched: [1:0.50]
-; GENERIC-NEXT: setbe %dil # sched: [2:1.00]
-; GENERIC-NEXT: seta %dil # sched: [2:1.00]
-; GENERIC-NEXT: sets %dil # sched: [1:0.50]
-; GENERIC-NEXT: setns %dil # sched: [1:0.50]
-; GENERIC-NEXT: setp %dil # sched: [1:0.50]
-; GENERIC-NEXT: setnp %dil # sched: [1:0.50]
-; GENERIC-NEXT: setl %dil # sched: [1:0.50]
-; GENERIC-NEXT: setge %dil # sched: [1:0.50]
-; GENERIC-NEXT: setle %dil # sched: [1:0.50]
-; GENERIC-NEXT: setg %dil # sched: [1:0.50]
-; GENERIC-NEXT: seto (%rsi) # sched: [2:1.00]
-; GENERIC-NEXT: setno (%rsi) # sched: [2:1.00]
-; GENERIC-NEXT: setb (%rsi) # sched: [2:1.00]
-; GENERIC-NEXT: setae (%rsi) # sched: [2:1.00]
-; GENERIC-NEXT: sete (%rsi) # sched: [2:1.00]
-; GENERIC-NEXT: setne (%rsi) # sched: [2:1.00]
-; GENERIC-NEXT: setbe (%rsi) # sched: [3:1.00]
-; GENERIC-NEXT: seta (%rsi) # sched: [3:1.00]
-; GENERIC-NEXT: sets (%rsi) # sched: [2:1.00]
-; GENERIC-NEXT: setns (%rsi) # sched: [2:1.00]
-; GENERIC-NEXT: setp (%rsi) # sched: [2:1.00]
-; GENERIC-NEXT: setnp (%rsi) # sched: [2:1.00]
-; GENERIC-NEXT: setl (%rsi) # sched: [2:1.00]
-; GENERIC-NEXT: setge (%rsi) # sched: [2:1.00]
-; GENERIC-NEXT: setle (%rsi) # sched: [2:1.00]
-; GENERIC-NEXT: setg (%rsi) # sched: [2:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_setcc:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: seto %dil # sched: [1:0.50]
-; ATOM-NEXT: setno %dil # sched: [1:0.50]
-; ATOM-NEXT: setb %dil # sched: [1:0.50]
-; ATOM-NEXT: setae %dil # sched: [1:0.50]
-; ATOM-NEXT: sete %dil # sched: [1:0.50]
-; ATOM-NEXT: setne %dil # sched: [1:0.50]
-; ATOM-NEXT: setbe %dil # sched: [1:0.50]
-; ATOM-NEXT: seta %dil # sched: [1:0.50]
-; ATOM-NEXT: sets %dil # sched: [1:0.50]
-; ATOM-NEXT: setns %dil # sched: [1:0.50]
-; ATOM-NEXT: setp %dil # sched: [1:0.50]
-; ATOM-NEXT: setnp %dil # sched: [1:0.50]
-; ATOM-NEXT: setl %dil # sched: [1:0.50]
-; ATOM-NEXT: setge %dil # sched: [1:0.50]
-; ATOM-NEXT: setle %dil # sched: [1:0.50]
-; ATOM-NEXT: setg %dil # sched: [1:0.50]
-; ATOM-NEXT: seto (%rsi) # sched: [2:1.00]
-; ATOM-NEXT: setno (%rsi) # sched: [2:1.00]
-; ATOM-NEXT: setb (%rsi) # sched: [2:1.00]
-; ATOM-NEXT: setae (%rsi) # sched: [2:1.00]
-; ATOM-NEXT: sete (%rsi) # sched: [2:1.00]
-; ATOM-NEXT: setne (%rsi) # sched: [2:1.00]
-; ATOM-NEXT: setbe (%rsi) # sched: [2:1.00]
-; ATOM-NEXT: seta (%rsi) # sched: [2:1.00]
-; ATOM-NEXT: sets (%rsi) # sched: [2:1.00]
-; ATOM-NEXT: setns (%rsi) # sched: [2:1.00]
-; ATOM-NEXT: setp (%rsi) # sched: [2:1.00]
-; ATOM-NEXT: setnp (%rsi) # sched: [2:1.00]
-; ATOM-NEXT: setl (%rsi) # sched: [2:1.00]
-; ATOM-NEXT: setge (%rsi) # sched: [2:1.00]
-; ATOM-NEXT: setle (%rsi) # sched: [2:1.00]
-; ATOM-NEXT: setg (%rsi) # sched: [2:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_setcc:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: seto %dil # sched: [1:0.50]
-; SLM-NEXT: setno %dil # sched: [1:0.50]
-; SLM-NEXT: setb %dil # sched: [1:0.50]
-; SLM-NEXT: setae %dil # sched: [1:0.50]
-; SLM-NEXT: sete %dil # sched: [1:0.50]
-; SLM-NEXT: setne %dil # sched: [1:0.50]
-; SLM-NEXT: setbe %dil # sched: [1:0.50]
-; SLM-NEXT: seta %dil # sched: [1:0.50]
-; SLM-NEXT: sets %dil # sched: [1:0.50]
-; SLM-NEXT: setns %dil # sched: [1:0.50]
-; SLM-NEXT: setp %dil # sched: [1:0.50]
-; SLM-NEXT: setnp %dil # sched: [1:0.50]
-; SLM-NEXT: setl %dil # sched: [1:0.50]
-; SLM-NEXT: setge %dil # sched: [1:0.50]
-; SLM-NEXT: setle %dil # sched: [1:0.50]
-; SLM-NEXT: setg %dil # sched: [1:0.50]
-; SLM-NEXT: seto (%rsi) # sched: [1:1.00]
-; SLM-NEXT: setno (%rsi) # sched: [1:1.00]
-; SLM-NEXT: setb (%rsi) # sched: [1:1.00]
-; SLM-NEXT: setae (%rsi) # sched: [1:1.00]
-; SLM-NEXT: sete (%rsi) # sched: [1:1.00]
-; SLM-NEXT: setne (%rsi) # sched: [1:1.00]
-; SLM-NEXT: setbe (%rsi) # sched: [1:1.00]
-; SLM-NEXT: seta (%rsi) # sched: [1:1.00]
-; SLM-NEXT: sets (%rsi) # sched: [1:1.00]
-; SLM-NEXT: setns (%rsi) # sched: [1:1.00]
-; SLM-NEXT: setp (%rsi) # sched: [1:1.00]
-; SLM-NEXT: setnp (%rsi) # sched: [1:1.00]
-; SLM-NEXT: setl (%rsi) # sched: [1:1.00]
-; SLM-NEXT: setge (%rsi) # sched: [1:1.00]
-; SLM-NEXT: setle (%rsi) # sched: [1:1.00]
-; SLM-NEXT: setg (%rsi) # sched: [1:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_setcc:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: seto %dil # sched: [1:0.50]
-; SANDY-NEXT: setno %dil # sched: [1:0.50]
-; SANDY-NEXT: setb %dil # sched: [1:0.50]
-; SANDY-NEXT: setae %dil # sched: [1:0.50]
-; SANDY-NEXT: sete %dil # sched: [1:0.50]
-; SANDY-NEXT: setne %dil # sched: [1:0.50]
-; SANDY-NEXT: setbe %dil # sched: [2:1.00]
-; SANDY-NEXT: seta %dil # sched: [2:1.00]
-; SANDY-NEXT: sets %dil # sched: [1:0.50]
-; SANDY-NEXT: setns %dil # sched: [1:0.50]
-; SANDY-NEXT: setp %dil # sched: [1:0.50]
-; SANDY-NEXT: setnp %dil # sched: [1:0.50]
-; SANDY-NEXT: setl %dil # sched: [1:0.50]
-; SANDY-NEXT: setge %dil # sched: [1:0.50]
-; SANDY-NEXT: setle %dil # sched: [1:0.50]
-; SANDY-NEXT: setg %dil # sched: [1:0.50]
-; SANDY-NEXT: seto (%rsi) # sched: [2:1.00]
-; SANDY-NEXT: setno (%rsi) # sched: [2:1.00]
-; SANDY-NEXT: setb (%rsi) # sched: [2:1.00]
-; SANDY-NEXT: setae (%rsi) # sched: [2:1.00]
-; SANDY-NEXT: sete (%rsi) # sched: [2:1.00]
-; SANDY-NEXT: setne (%rsi) # sched: [2:1.00]
-; SANDY-NEXT: setbe (%rsi) # sched: [3:1.00]
-; SANDY-NEXT: seta (%rsi) # sched: [3:1.00]
-; SANDY-NEXT: sets (%rsi) # sched: [2:1.00]
-; SANDY-NEXT: setns (%rsi) # sched: [2:1.00]
-; SANDY-NEXT: setp (%rsi) # sched: [2:1.00]
-; SANDY-NEXT: setnp (%rsi) # sched: [2:1.00]
-; SANDY-NEXT: setl (%rsi) # sched: [2:1.00]
-; SANDY-NEXT: setge (%rsi) # sched: [2:1.00]
-; SANDY-NEXT: setle (%rsi) # sched: [2:1.00]
-; SANDY-NEXT: setg (%rsi) # sched: [2:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_setcc:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: seto %dil # sched: [1:0.50]
-; HASWELL-NEXT: setno %dil # sched: [1:0.50]
-; HASWELL-NEXT: setb %dil # sched: [1:0.50]
-; HASWELL-NEXT: setae %dil # sched: [1:0.50]
-; HASWELL-NEXT: sete %dil # sched: [1:0.50]
-; HASWELL-NEXT: setne %dil # sched: [1:0.50]
-; HASWELL-NEXT: setbe %dil # sched: [2:0.50]
-; HASWELL-NEXT: seta %dil # sched: [2:0.50]
-; HASWELL-NEXT: sets %dil # sched: [1:0.50]
-; HASWELL-NEXT: setns %dil # sched: [1:0.50]
-; HASWELL-NEXT: setp %dil # sched: [1:0.50]
-; HASWELL-NEXT: setnp %dil # sched: [1:0.50]
-; HASWELL-NEXT: setl %dil # sched: [1:0.50]
-; HASWELL-NEXT: setge %dil # sched: [1:0.50]
-; HASWELL-NEXT: setle %dil # sched: [1:0.50]
-; HASWELL-NEXT: setg %dil # sched: [1:0.50]
-; HASWELL-NEXT: seto (%rsi) # sched: [2:1.00]
-; HASWELL-NEXT: setno (%rsi) # sched: [2:1.00]
-; HASWELL-NEXT: setb (%rsi) # sched: [2:1.00]
-; HASWELL-NEXT: setae (%rsi) # sched: [2:1.00]
-; HASWELL-NEXT: sete (%rsi) # sched: [2:1.00]
-; HASWELL-NEXT: setne (%rsi) # sched: [2:1.00]
-; HASWELL-NEXT: setbe (%rsi) # sched: [3:1.00]
-; HASWELL-NEXT: seta (%rsi) # sched: [3:1.00]
-; HASWELL-NEXT: sets (%rsi) # sched: [2:1.00]
-; HASWELL-NEXT: setns (%rsi) # sched: [2:1.00]
-; HASWELL-NEXT: setp (%rsi) # sched: [2:1.00]
-; HASWELL-NEXT: setnp (%rsi) # sched: [2:1.00]
-; HASWELL-NEXT: setl (%rsi) # sched: [2:1.00]
-; HASWELL-NEXT: setge (%rsi) # sched: [2:1.00]
-; HASWELL-NEXT: setle (%rsi) # sched: [2:1.00]
-; HASWELL-NEXT: setg (%rsi) # sched: [2:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_setcc:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: seto %dil # sched: [1:0.50]
-; BROADWELL-NEXT: setno %dil # sched: [1:0.50]
-; BROADWELL-NEXT: setb %dil # sched: [1:0.50]
-; BROADWELL-NEXT: setae %dil # sched: [1:0.50]
-; BROADWELL-NEXT: sete %dil # sched: [1:0.50]
-; BROADWELL-NEXT: setne %dil # sched: [1:0.50]
-; BROADWELL-NEXT: setbe %dil # sched: [2:0.50]
-; BROADWELL-NEXT: seta %dil # sched: [2:0.50]
-; BROADWELL-NEXT: sets %dil # sched: [1:0.50]
-; BROADWELL-NEXT: setns %dil # sched: [1:0.50]
-; BROADWELL-NEXT: setp %dil # sched: [1:0.50]
-; BROADWELL-NEXT: setnp %dil # sched: [1:0.50]
-; BROADWELL-NEXT: setl %dil # sched: [1:0.50]
-; BROADWELL-NEXT: setge %dil # sched: [1:0.50]
-; BROADWELL-NEXT: setle %dil # sched: [1:0.50]
-; BROADWELL-NEXT: setg %dil # sched: [1:0.50]
-; BROADWELL-NEXT: seto (%rsi) # sched: [2:1.00]
-; BROADWELL-NEXT: setno (%rsi) # sched: [2:1.00]
-; BROADWELL-NEXT: setb (%rsi) # sched: [2:1.00]
-; BROADWELL-NEXT: setae (%rsi) # sched: [2:1.00]
-; BROADWELL-NEXT: sete (%rsi) # sched: [2:1.00]
-; BROADWELL-NEXT: setne (%rsi) # sched: [2:1.00]
-; BROADWELL-NEXT: setbe (%rsi) # sched: [3:1.00]
-; BROADWELL-NEXT: seta (%rsi) # sched: [3:1.00]
-; BROADWELL-NEXT: sets (%rsi) # sched: [2:1.00]
-; BROADWELL-NEXT: setns (%rsi) # sched: [2:1.00]
-; BROADWELL-NEXT: setp (%rsi) # sched: [2:1.00]
-; BROADWELL-NEXT: setnp (%rsi) # sched: [2:1.00]
-; BROADWELL-NEXT: setl (%rsi) # sched: [2:1.00]
-; BROADWELL-NEXT: setge (%rsi) # sched: [2:1.00]
-; BROADWELL-NEXT: setle (%rsi) # sched: [2:1.00]
-; BROADWELL-NEXT: setg (%rsi) # sched: [2:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_setcc:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: seto %dil # sched: [1:0.50]
-; SKYLAKE-NEXT: setno %dil # sched: [1:0.50]
-; SKYLAKE-NEXT: setb %dil # sched: [1:0.50]
-; SKYLAKE-NEXT: setae %dil # sched: [1:0.50]
-; SKYLAKE-NEXT: sete %dil # sched: [1:0.50]
-; SKYLAKE-NEXT: setne %dil # sched: [1:0.50]
-; SKYLAKE-NEXT: setbe %dil # sched: [2:1.00]
-; SKYLAKE-NEXT: seta %dil # sched: [2:1.00]
-; SKYLAKE-NEXT: sets %dil # sched: [1:0.50]
-; SKYLAKE-NEXT: setns %dil # sched: [1:0.50]
-; SKYLAKE-NEXT: setp %dil # sched: [1:0.50]
-; SKYLAKE-NEXT: setnp %dil # sched: [1:0.50]
-; SKYLAKE-NEXT: setl %dil # sched: [1:0.50]
-; SKYLAKE-NEXT: setge %dil # sched: [1:0.50]
-; SKYLAKE-NEXT: setle %dil # sched: [1:0.50]
-; SKYLAKE-NEXT: setg %dil # sched: [1:0.50]
-; SKYLAKE-NEXT: seto (%rsi) # sched: [2:1.00]
-; SKYLAKE-NEXT: setno (%rsi) # sched: [2:1.00]
-; SKYLAKE-NEXT: setb (%rsi) # sched: [2:1.00]
-; SKYLAKE-NEXT: setae (%rsi) # sched: [2:1.00]
-; SKYLAKE-NEXT: sete (%rsi) # sched: [2:1.00]
-; SKYLAKE-NEXT: setne (%rsi) # sched: [2:1.00]
-; SKYLAKE-NEXT: setbe (%rsi) # sched: [3:1.00]
-; SKYLAKE-NEXT: seta (%rsi) # sched: [3:1.00]
-; SKYLAKE-NEXT: sets (%rsi) # sched: [2:1.00]
-; SKYLAKE-NEXT: setns (%rsi) # sched: [2:1.00]
-; SKYLAKE-NEXT: setp (%rsi) # sched: [2:1.00]
-; SKYLAKE-NEXT: setnp (%rsi) # sched: [2:1.00]
-; SKYLAKE-NEXT: setl (%rsi) # sched: [2:1.00]
-; SKYLAKE-NEXT: setge (%rsi) # sched: [2:1.00]
-; SKYLAKE-NEXT: setle (%rsi) # sched: [2:1.00]
-; SKYLAKE-NEXT: setg (%rsi) # sched: [2:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_setcc:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: seto %dil # sched: [1:0.50]
-; SKX-NEXT: setno %dil # sched: [1:0.50]
-; SKX-NEXT: setb %dil # sched: [1:0.50]
-; SKX-NEXT: setae %dil # sched: [1:0.50]
-; SKX-NEXT: sete %dil # sched: [1:0.50]
-; SKX-NEXT: setne %dil # sched: [1:0.50]
-; SKX-NEXT: setbe %dil # sched: [2:1.00]
-; SKX-NEXT: seta %dil # sched: [2:1.00]
-; SKX-NEXT: sets %dil # sched: [1:0.50]
-; SKX-NEXT: setns %dil # sched: [1:0.50]
-; SKX-NEXT: setp %dil # sched: [1:0.50]
-; SKX-NEXT: setnp %dil # sched: [1:0.50]
-; SKX-NEXT: setl %dil # sched: [1:0.50]
-; SKX-NEXT: setge %dil # sched: [1:0.50]
-; SKX-NEXT: setle %dil # sched: [1:0.50]
-; SKX-NEXT: setg %dil # sched: [1:0.50]
-; SKX-NEXT: seto (%rsi) # sched: [2:1.00]
-; SKX-NEXT: setno (%rsi) # sched: [2:1.00]
-; SKX-NEXT: setb (%rsi) # sched: [2:1.00]
-; SKX-NEXT: setae (%rsi) # sched: [2:1.00]
-; SKX-NEXT: sete (%rsi) # sched: [2:1.00]
-; SKX-NEXT: setne (%rsi) # sched: [2:1.00]
-; SKX-NEXT: setbe (%rsi) # sched: [3:1.00]
-; SKX-NEXT: seta (%rsi) # sched: [3:1.00]
-; SKX-NEXT: sets (%rsi) # sched: [2:1.00]
-; SKX-NEXT: setns (%rsi) # sched: [2:1.00]
-; SKX-NEXT: setp (%rsi) # sched: [2:1.00]
-; SKX-NEXT: setnp (%rsi) # sched: [2:1.00]
-; SKX-NEXT: setl (%rsi) # sched: [2:1.00]
-; SKX-NEXT: setge (%rsi) # sched: [2:1.00]
-; SKX-NEXT: setle (%rsi) # sched: [2:1.00]
-; SKX-NEXT: setg (%rsi) # sched: [2:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_setcc:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: seto %dil # sched: [1:0.50]
-; BDVER2-NEXT: setno %dil # sched: [1:0.50]
-; BDVER2-NEXT: setb %dil # sched: [1:0.50]
-; BDVER2-NEXT: setae %dil # sched: [1:0.50]
-; BDVER2-NEXT: sete %dil # sched: [1:0.50]
-; BDVER2-NEXT: setne %dil # sched: [1:0.50]
-; BDVER2-NEXT: setbe %dil # sched: [1:0.50]
-; BDVER2-NEXT: seta %dil # sched: [1:0.50]
-; BDVER2-NEXT: sets %dil # sched: [1:0.50]
-; BDVER2-NEXT: setns %dil # sched: [1:0.50]
-; BDVER2-NEXT: setp %dil # sched: [1:0.50]
-; BDVER2-NEXT: setnp %dil # sched: [1:0.50]
-; BDVER2-NEXT: setl %dil # sched: [1:0.50]
-; BDVER2-NEXT: setge %dil # sched: [1:0.50]
-; BDVER2-NEXT: setle %dil # sched: [1:0.50]
-; BDVER2-NEXT: setg %dil # sched: [1:0.50]
-; BDVER2-NEXT: seto (%rsi) # sched: [1:1.00]
-; BDVER2-NEXT: setno (%rsi) # sched: [1:1.00]
-; BDVER2-NEXT: setb (%rsi) # sched: [1:1.00]
-; BDVER2-NEXT: setae (%rsi) # sched: [1:1.00]
-; BDVER2-NEXT: sete (%rsi) # sched: [1:1.00]
-; BDVER2-NEXT: setne (%rsi) # sched: [1:1.00]
-; BDVER2-NEXT: setbe (%rsi) # sched: [1:1.00]
-; BDVER2-NEXT: seta (%rsi) # sched: [1:1.00]
-; BDVER2-NEXT: sets (%rsi) # sched: [1:1.00]
-; BDVER2-NEXT: setns (%rsi) # sched: [1:1.00]
-; BDVER2-NEXT: setp (%rsi) # sched: [1:1.00]
-; BDVER2-NEXT: setnp (%rsi) # sched: [1:1.00]
-; BDVER2-NEXT: setl (%rsi) # sched: [1:1.00]
-; BDVER2-NEXT: setge (%rsi) # sched: [1:1.00]
-; BDVER2-NEXT: setle (%rsi) # sched: [1:1.00]
-; BDVER2-NEXT: setg (%rsi) # sched: [1:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_setcc:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: seto %dil # sched: [1:0.50]
-; BTVER2-NEXT: setno %dil # sched: [1:0.50]
-; BTVER2-NEXT: setb %dil # sched: [1:0.50]
-; BTVER2-NEXT: setae %dil # sched: [1:0.50]
-; BTVER2-NEXT: sete %dil # sched: [1:0.50]
-; BTVER2-NEXT: setne %dil # sched: [1:0.50]
-; BTVER2-NEXT: setbe %dil # sched: [1:0.50]
-; BTVER2-NEXT: seta %dil # sched: [1:0.50]
-; BTVER2-NEXT: sets %dil # sched: [1:0.50]
-; BTVER2-NEXT: setns %dil # sched: [1:0.50]
-; BTVER2-NEXT: setp %dil # sched: [1:0.50]
-; BTVER2-NEXT: setnp %dil # sched: [1:0.50]
-; BTVER2-NEXT: setl %dil # sched: [1:0.50]
-; BTVER2-NEXT: setge %dil # sched: [1:0.50]
-; BTVER2-NEXT: setle %dil # sched: [1:0.50]
-; BTVER2-NEXT: setg %dil # sched: [1:0.50]
-; BTVER2-NEXT: seto (%rsi) # sched: [1:1.00]
-; BTVER2-NEXT: setno (%rsi) # sched: [1:1.00]
-; BTVER2-NEXT: setb (%rsi) # sched: [1:1.00]
-; BTVER2-NEXT: setae (%rsi) # sched: [1:1.00]
-; BTVER2-NEXT: sete (%rsi) # sched: [1:1.00]
-; BTVER2-NEXT: setne (%rsi) # sched: [1:1.00]
-; BTVER2-NEXT: setbe (%rsi) # sched: [1:1.00]
-; BTVER2-NEXT: seta (%rsi) # sched: [1:1.00]
-; BTVER2-NEXT: sets (%rsi) # sched: [1:1.00]
-; BTVER2-NEXT: setns (%rsi) # sched: [1:1.00]
-; BTVER2-NEXT: setp (%rsi) # sched: [1:1.00]
-; BTVER2-NEXT: setnp (%rsi) # sched: [1:1.00]
-; BTVER2-NEXT: setl (%rsi) # sched: [1:1.00]
-; BTVER2-NEXT: setge (%rsi) # sched: [1:1.00]
-; BTVER2-NEXT: setle (%rsi) # sched: [1:1.00]
-; BTVER2-NEXT: setg (%rsi) # sched: [1:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_setcc:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: seto %dil # sched: [1:0.25]
-; ZNVER1-NEXT: setno %dil # sched: [1:0.25]
-; ZNVER1-NEXT: setb %dil # sched: [1:0.25]
-; ZNVER1-NEXT: setae %dil # sched: [1:0.25]
-; ZNVER1-NEXT: sete %dil # sched: [1:0.25]
-; ZNVER1-NEXT: setne %dil # sched: [1:0.25]
-; ZNVER1-NEXT: setbe %dil # sched: [1:0.25]
-; ZNVER1-NEXT: seta %dil # sched: [1:0.25]
-; ZNVER1-NEXT: sets %dil # sched: [1:0.25]
-; ZNVER1-NEXT: setns %dil # sched: [1:0.25]
-; ZNVER1-NEXT: setp %dil # sched: [1:0.25]
-; ZNVER1-NEXT: setnp %dil # sched: [1:0.25]
-; ZNVER1-NEXT: setl %dil # sched: [1:0.25]
-; ZNVER1-NEXT: setge %dil # sched: [1:0.25]
-; ZNVER1-NEXT: setle %dil # sched: [1:0.25]
-; ZNVER1-NEXT: setg %dil # sched: [1:0.25]
-; ZNVER1-NEXT: seto (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT: setno (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT: setb (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT: setae (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT: sete (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT: setne (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT: setbe (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT: seta (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT: sets (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT: setns (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT: setp (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT: setnp (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT: setl (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT: setge (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT: setle (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT: setg (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "seto $0 \0A\09 setno $0 \0A\09 setb $0 \0A\09 setnb $0 \0A\09 setz $0 \0A\09 setnz $0 \0A\09 setbe $0 \0A\09 setnbe $0 \0A\09 sets $0 \0A\09 setns $0 \0A\09 setp $0 \0A\09 setnp $0 \0A\09 setl $0 \0A\09 setnl $0 \0A\09 setle $0 \0A\09 setnle $0 \0A\09 seto $1 \0A\09 setno $1 \0A\09 setb $1 \0A\09 setnb $1 \0A\09 setz $1 \0A\09 setnz $1 \0A\09 setbe $1 \0A\09 setnbe $1 \0A\09 sets $1 \0A\09 setns $1 \0A\09 setp $1 \0A\09 setnp $1 \0A\09 setl $1 \0A\09 setnl $1 \0A\09 setle $1 \0A\09 setnle $1", "r,*m"(i8 %a0, i8 *%a1)
- ret void
-}
-
-; TODO - test_sgdt
-
-define void @test_shld_shrd_16(i16 %a0, i16 %a1, i16 *%a2) optsize {
-; GENERIC-LABEL: test_shld_shrd_16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: shldw %cl, %si, %di # sched: [4:1.50]
-; GENERIC-NEXT: shrdw %cl, %si, %di # sched: [4:1.50]
-; GENERIC-NEXT: shldw %cl, %si, (%rdx) # sched: [10:1.50]
-; GENERIC-NEXT: shrdw %cl, %si, (%rdx) # sched: [10:1.50]
-; GENERIC-NEXT: shldw $7, %si, %di # sched: [2:0.67]
-; GENERIC-NEXT: shrdw $7, %si, %di # sched: [2:0.67]
-; GENERIC-NEXT: shldw $7, %si, (%rdx) # sched: [8:1.00]
-; GENERIC-NEXT: shrdw $7, %si, (%rdx) # sched: [8:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_shld_shrd_16:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: shldw %cl, %si, %di # sched: [6:3.00]
-; ATOM-NEXT: shrdw %cl, %si, %di # sched: [6:3.00]
-; ATOM-NEXT: shldw %cl, %si, (%rdx) # sched: [6:3.00]
-; ATOM-NEXT: shrdw %cl, %si, (%rdx) # sched: [6:3.00]
-; ATOM-NEXT: shldw $7, %si, %di # sched: [6:3.00]
-; ATOM-NEXT: shrdw $7, %si, %di # sched: [6:3.00]
-; ATOM-NEXT: shldw $7, %si, (%rdx) # sched: [6:3.00]
-; ATOM-NEXT: shrdw $7, %si, (%rdx) # sched: [6:3.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_shld_shrd_16:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: shldw %cl, %si, %di # sched: [1:1.00]
-; SLM-NEXT: shrdw %cl, %si, %di # sched: [1:1.00]
-; SLM-NEXT: shldw %cl, %si, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: shrdw %cl, %si, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: shldw $7, %si, %di # sched: [1:1.00]
-; SLM-NEXT: shrdw $7, %si, %di # sched: [1:1.00]
-; SLM-NEXT: shldw $7, %si, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: shrdw $7, %si, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_shld_shrd_16:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: shldw %cl, %si, %di # sched: [4:1.50]
-; SANDY-NEXT: shrdw %cl, %si, %di # sched: [4:1.50]
-; SANDY-NEXT: shldw %cl, %si, (%rdx) # sched: [10:1.50]
-; SANDY-NEXT: shrdw %cl, %si, (%rdx) # sched: [10:1.50]
-; SANDY-NEXT: shldw $7, %si, %di # sched: [2:0.67]
-; SANDY-NEXT: shrdw $7, %si, %di # sched: [2:0.67]
-; SANDY-NEXT: shldw $7, %si, (%rdx) # sched: [8:1.00]
-; SANDY-NEXT: shrdw $7, %si, (%rdx) # sched: [8:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_shld_shrd_16:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: shldw %cl, %si, %di # sched: [6:1.00]
-; HASWELL-NEXT: shrdw %cl, %si, %di # sched: [6:1.00]
-; HASWELL-NEXT: shldw %cl, %si, (%rdx) # sched: [12:1.00]
-; HASWELL-NEXT: shrdw %cl, %si, (%rdx) # sched: [12:1.00]
-; HASWELL-NEXT: shldw $7, %si, %di # sched: [3:1.00]
-; HASWELL-NEXT: shrdw $7, %si, %di # sched: [3:1.00]
-; HASWELL-NEXT: shldw $7, %si, (%rdx) # sched: [10:1.00]
-; HASWELL-NEXT: shrdw $7, %si, (%rdx) # sched: [10:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_shld_shrd_16:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: shldw %cl, %si, %di # sched: [6:1.00]
-; BROADWELL-NEXT: shrdw %cl, %si, %di # sched: [6:1.00]
-; BROADWELL-NEXT: shldw %cl, %si, (%rdx) # sched: [11:1.00]
-; BROADWELL-NEXT: shrdw %cl, %si, (%rdx) # sched: [11:1.00]
-; BROADWELL-NEXT: shldw $7, %si, %di # sched: [3:1.00]
-; BROADWELL-NEXT: shrdw $7, %si, %di # sched: [3:1.00]
-; BROADWELL-NEXT: shldw $7, %si, (%rdx) # sched: [9:1.00]
-; BROADWELL-NEXT: shrdw $7, %si, (%rdx) # sched: [9:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_shld_shrd_16:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: shldw %cl, %si, %di # sched: [6:1.00]
-; SKYLAKE-NEXT: shrdw %cl, %si, %di # sched: [6:1.00]
-; SKYLAKE-NEXT: shldw %cl, %si, (%rdx) # sched: [11:1.00]
-; SKYLAKE-NEXT: shrdw %cl, %si, (%rdx) # sched: [11:1.00]
-; SKYLAKE-NEXT: shldw $7, %si, %di # sched: [3:1.00]
-; SKYLAKE-NEXT: shrdw $7, %si, %di # sched: [3:1.00]
-; SKYLAKE-NEXT: shldw $7, %si, (%rdx) # sched: [9:1.00]
-; SKYLAKE-NEXT: shrdw $7, %si, (%rdx) # sched: [9:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_shld_shrd_16:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: shldw %cl, %si, %di # sched: [6:1.00]
-; SKX-NEXT: shrdw %cl, %si, %di # sched: [6:1.00]
-; SKX-NEXT: shldw %cl, %si, (%rdx) # sched: [11:1.00]
-; SKX-NEXT: shrdw %cl, %si, (%rdx) # sched: [11:1.00]
-; SKX-NEXT: shldw $7, %si, %di # sched: [3:1.00]
-; SKX-NEXT: shrdw $7, %si, %di # sched: [3:1.00]
-; SKX-NEXT: shldw $7, %si, (%rdx) # sched: [9:1.00]
-; SKX-NEXT: shrdw $7, %si, (%rdx) # sched: [9:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_shld_shrd_16:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: shldw %cl, %si, %di # sched: [4:4.00]
-; BDVER2-NEXT: shrdw %cl, %si, %di # sched: [4:4.00]
-; BDVER2-NEXT: shldw %cl, %si, (%rdx) # sched: [4:11.00]
-; BDVER2-NEXT: shrdw %cl, %si, (%rdx) # sched: [4:11.00]
-; BDVER2-NEXT: shldw $7, %si, %di # sched: [4:3.00]
-; BDVER2-NEXT: shrdw $7, %si, %di # sched: [3:3.00]
-; BDVER2-NEXT: shldw $7, %si, (%rdx) # sched: [4:11.00]
-; BDVER2-NEXT: shrdw $7, %si, (%rdx) # sched: [4:11.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_shld_shrd_16:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: shldw %cl, %si, %di # sched: [4:4.00]
-; BTVER2-NEXT: shrdw %cl, %si, %di # sched: [4:4.00]
-; BTVER2-NEXT: shldw %cl, %si, (%rdx) # sched: [9:11.00]
-; BTVER2-NEXT: shrdw %cl, %si, (%rdx) # sched: [9:11.00]
-; BTVER2-NEXT: shldw $7, %si, %di # sched: [3:3.00]
-; BTVER2-NEXT: shrdw $7, %si, %di # sched: [3:3.00]
-; BTVER2-NEXT: shldw $7, %si, (%rdx) # sched: [9:11.00]
-; BTVER2-NEXT: shrdw $7, %si, (%rdx) # sched: [9:11.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_shld_shrd_16:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: shldw %cl, %si, %di # sched: [100:0.25]
-; ZNVER1-NEXT: shrdw %cl, %si, %di # sched: [100:0.25]
-; ZNVER1-NEXT: shldw %cl, %si, (%rdx) # sched: [100:0.25]
-; ZNVER1-NEXT: shrdw %cl, %si, (%rdx) # sched: [100:0.25]
-; ZNVER1-NEXT: shldw $7, %si, %di # sched: [1:0.25]
-; ZNVER1-NEXT: shrdw $7, %si, %di # sched: [1:0.25]
-; ZNVER1-NEXT: shldw $7, %si, (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: shrdw $7, %si, (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "shld $1, $0 \0A\09 shrd $1, $0 \0A\09 shld $1, $2 \0A\09 shrd $1, $2 \0A\09 shld $3, $1, $0 \0A\09 shrd $3, $1, $0 \0A\09 shld $3, $1, $2 \0A\09 shrd $3, $1, $2", "r,r,*m,i"(i16 %a0, i16 %a1, i16 *%a2, i8 7)
- ret void
-}
-define void @test_shld_shrd_32(i32 %a0, i32 %a1, i32 *%a2) optsize {
-; GENERIC-LABEL: test_shld_shrd_32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: shldl %cl, %esi, %edi # sched: [4:1.50]
-; GENERIC-NEXT: shrdl %cl, %esi, %edi # sched: [4:1.50]
-; GENERIC-NEXT: shldl %cl, %esi, (%rdx) # sched: [10:1.50]
-; GENERIC-NEXT: shrdl %cl, %esi, (%rdx) # sched: [10:1.50]
-; GENERIC-NEXT: shldl $7, %esi, %edi # sched: [2:0.67]
-; GENERIC-NEXT: shrdl $7, %esi, %edi # sched: [2:0.67]
-; GENERIC-NEXT: shldl $7, %esi, (%rdx) # sched: [8:1.00]
-; GENERIC-NEXT: shrdl $7, %esi, (%rdx) # sched: [8:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_shld_shrd_32:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: shldl %cl, %esi, %edi # sched: [2:1.00]
-; ATOM-NEXT: shrdl %cl, %esi, %edi # sched: [2:1.00]
-; ATOM-NEXT: shldl %cl, %esi, (%rdx) # sched: [4:2.00]
-; ATOM-NEXT: shrdl %cl, %esi, (%rdx) # sched: [4:2.00]
-; ATOM-NEXT: shldl $7, %esi, %edi # sched: [2:1.00]
-; ATOM-NEXT: shrdl $7, %esi, %edi # sched: [2:1.00]
-; ATOM-NEXT: shldl $7, %esi, (%rdx) # sched: [4:2.00]
-; ATOM-NEXT: shrdl $7, %esi, (%rdx) # sched: [4:2.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_shld_shrd_32:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: shldl %cl, %esi, %edi # sched: [1:1.00]
-; SLM-NEXT: shrdl %cl, %esi, %edi # sched: [1:1.00]
-; SLM-NEXT: shldl %cl, %esi, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: shrdl %cl, %esi, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: shldl $7, %esi, %edi # sched: [1:1.00]
-; SLM-NEXT: shrdl $7, %esi, %edi # sched: [1:1.00]
-; SLM-NEXT: shldl $7, %esi, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: shrdl $7, %esi, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_shld_shrd_32:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: shldl %cl, %esi, %edi # sched: [4:1.50]
-; SANDY-NEXT: shrdl %cl, %esi, %edi # sched: [4:1.50]
-; SANDY-NEXT: shldl %cl, %esi, (%rdx) # sched: [10:1.50]
-; SANDY-NEXT: shrdl %cl, %esi, (%rdx) # sched: [10:1.50]
-; SANDY-NEXT: shldl $7, %esi, %edi # sched: [2:0.67]
-; SANDY-NEXT: shrdl $7, %esi, %edi # sched: [2:0.67]
-; SANDY-NEXT: shldl $7, %esi, (%rdx) # sched: [8:1.00]
-; SANDY-NEXT: shrdl $7, %esi, (%rdx) # sched: [8:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_shld_shrd_32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: shldl %cl, %esi, %edi # sched: [6:1.00]
-; HASWELL-NEXT: shrdl %cl, %esi, %edi # sched: [6:1.00]
-; HASWELL-NEXT: shldl %cl, %esi, (%rdx) # sched: [12:1.00]
-; HASWELL-NEXT: shrdl %cl, %esi, (%rdx) # sched: [12:1.00]
-; HASWELL-NEXT: shldl $7, %esi, %edi # sched: [3:1.00]
-; HASWELL-NEXT: shrdl $7, %esi, %edi # sched: [3:1.00]
-; HASWELL-NEXT: shldl $7, %esi, (%rdx) # sched: [10:1.00]
-; HASWELL-NEXT: shrdl $7, %esi, (%rdx) # sched: [10:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_shld_shrd_32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: shldl %cl, %esi, %edi # sched: [6:1.00]
-; BROADWELL-NEXT: shrdl %cl, %esi, %edi # sched: [6:1.00]
-; BROADWELL-NEXT: shldl %cl, %esi, (%rdx) # sched: [11:1.00]
-; BROADWELL-NEXT: shrdl %cl, %esi, (%rdx) # sched: [11:1.00]
-; BROADWELL-NEXT: shldl $7, %esi, %edi # sched: [3:1.00]
-; BROADWELL-NEXT: shrdl $7, %esi, %edi # sched: [3:1.00]
-; BROADWELL-NEXT: shldl $7, %esi, (%rdx) # sched: [9:1.00]
-; BROADWELL-NEXT: shrdl $7, %esi, (%rdx) # sched: [9:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_shld_shrd_32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: shldl %cl, %esi, %edi # sched: [6:1.00]
-; SKYLAKE-NEXT: shrdl %cl, %esi, %edi # sched: [6:1.00]
-; SKYLAKE-NEXT: shldl %cl, %esi, (%rdx) # sched: [11:1.00]
-; SKYLAKE-NEXT: shrdl %cl, %esi, (%rdx) # sched: [11:1.00]
-; SKYLAKE-NEXT: shldl $7, %esi, %edi # sched: [3:1.00]
-; SKYLAKE-NEXT: shrdl $7, %esi, %edi # sched: [3:1.00]
-; SKYLAKE-NEXT: shldl $7, %esi, (%rdx) # sched: [9:1.00]
-; SKYLAKE-NEXT: shrdl $7, %esi, (%rdx) # sched: [9:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_shld_shrd_32:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: shldl %cl, %esi, %edi # sched: [6:1.00]
-; SKX-NEXT: shrdl %cl, %esi, %edi # sched: [6:1.00]
-; SKX-NEXT: shldl %cl, %esi, (%rdx) # sched: [11:1.00]
-; SKX-NEXT: shrdl %cl, %esi, (%rdx) # sched: [11:1.00]
-; SKX-NEXT: shldl $7, %esi, %edi # sched: [3:1.00]
-; SKX-NEXT: shrdl $7, %esi, %edi # sched: [3:1.00]
-; SKX-NEXT: shldl $7, %esi, (%rdx) # sched: [9:1.00]
-; SKX-NEXT: shrdl $7, %esi, (%rdx) # sched: [9:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_shld_shrd_32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: shldl %cl, %esi, %edi # sched: [4:4.00]
-; BDVER2-NEXT: shrdl %cl, %esi, %edi # sched: [4:4.00]
-; BDVER2-NEXT: shldl %cl, %esi, (%rdx) # sched: [4:11.00]
-; BDVER2-NEXT: shrdl %cl, %esi, (%rdx) # sched: [4:11.00]
-; BDVER2-NEXT: shldl $7, %esi, %edi # sched: [3:3.00]
-; BDVER2-NEXT: shrdl $7, %esi, %edi # sched: [4:3.00]
-; BDVER2-NEXT: shldl $7, %esi, (%rdx) # sched: [4:11.00]
-; BDVER2-NEXT: shrdl $7, %esi, (%rdx) # sched: [4:11.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_shld_shrd_32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: shldl %cl, %esi, %edi # sched: [4:4.00]
-; BTVER2-NEXT: shrdl %cl, %esi, %edi # sched: [4:4.00]
-; BTVER2-NEXT: shldl %cl, %esi, (%rdx) # sched: [9:11.00]
-; BTVER2-NEXT: shrdl %cl, %esi, (%rdx) # sched: [9:11.00]
-; BTVER2-NEXT: shldl $7, %esi, %edi # sched: [3:3.00]
-; BTVER2-NEXT: shrdl $7, %esi, %edi # sched: [3:3.00]
-; BTVER2-NEXT: shldl $7, %esi, (%rdx) # sched: [9:11.00]
-; BTVER2-NEXT: shrdl $7, %esi, (%rdx) # sched: [9:11.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_shld_shrd_32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: shldl %cl, %esi, %edi # sched: [100:0.25]
-; ZNVER1-NEXT: shrdl %cl, %esi, %edi # sched: [100:0.25]
-; ZNVER1-NEXT: shldl %cl, %esi, (%rdx) # sched: [100:0.25]
-; ZNVER1-NEXT: shrdl %cl, %esi, (%rdx) # sched: [100:0.25]
-; ZNVER1-NEXT: shldl $7, %esi, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: shrdl $7, %esi, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: shldl $7, %esi, (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: shrdl $7, %esi, (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "shld $1, $0 \0A\09 shrd $1, $0 \0A\09 shld $1, $2 \0A\09 shrd $1, $2 \0A\09 shld $3, $1, $0 \0A\09 shrd $3, $1, $0 \0A\09 shld $3, $1, $2 \0A\09 shrd $3, $1, $2", "r,r,*m,i"(i32 %a0, i32 %a1, i32 *%a2, i8 7)
- ret void
-}
-define void @test_shld_shrd_64(i64 %a0, i64 %a1, i64 *%a2) optsize {
-; GENERIC-LABEL: test_shld_shrd_64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: shldq %cl, %rsi, %rdi # sched: [4:1.50]
-; GENERIC-NEXT: shrdq %cl, %rsi, %rdi # sched: [4:1.50]
-; GENERIC-NEXT: shldq %cl, %rsi, (%rdx) # sched: [10:1.50]
-; GENERIC-NEXT: shrdq %cl, %rsi, (%rdx) # sched: [10:1.50]
-; GENERIC-NEXT: shldq $7, %rsi, %rdi # sched: [2:0.67]
-; GENERIC-NEXT: shrdq $7, %rsi, %rdi # sched: [2:0.67]
-; GENERIC-NEXT: shldq $7, %rsi, (%rdx) # sched: [8:1.00]
-; GENERIC-NEXT: shrdq $7, %rsi, (%rdx) # sched: [8:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_shld_shrd_64:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: shldq %cl, %rsi, %rdi # sched: [8:4.00]
-; ATOM-NEXT: shrdq %cl, %rsi, %rdi # sched: [8:4.00]
-; ATOM-NEXT: shldq %cl, %rsi, (%rdx) # sched: [9:4.50]
-; ATOM-NEXT: shrdq %cl, %rsi, (%rdx) # sched: [9:4.50]
-; ATOM-NEXT: shldq $7, %rsi, %rdi # sched: [9:4.50]
-; ATOM-NEXT: shrdq $7, %rsi, %rdi # sched: [9:4.50]
-; ATOM-NEXT: shldq $7, %rsi, (%rdx) # sched: [9:4.50]
-; ATOM-NEXT: shrdq $7, %rsi, (%rdx) # sched: [9:4.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_shld_shrd_64:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: shldq %cl, %rsi, %rdi # sched: [1:1.00]
-; SLM-NEXT: shrdq %cl, %rsi, %rdi # sched: [1:1.00]
-; SLM-NEXT: shldq %cl, %rsi, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: shrdq %cl, %rsi, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: shldq $7, %rsi, %rdi # sched: [1:1.00]
-; SLM-NEXT: shrdq $7, %rsi, %rdi # sched: [1:1.00]
-; SLM-NEXT: shldq $7, %rsi, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: shrdq $7, %rsi, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_shld_shrd_64:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: shldq %cl, %rsi, %rdi # sched: [4:1.50]
-; SANDY-NEXT: shrdq %cl, %rsi, %rdi # sched: [4:1.50]
-; SANDY-NEXT: shldq %cl, %rsi, (%rdx) # sched: [10:1.50]
-; SANDY-NEXT: shrdq %cl, %rsi, (%rdx) # sched: [10:1.50]
-; SANDY-NEXT: shldq $7, %rsi, %rdi # sched: [2:0.67]
-; SANDY-NEXT: shrdq $7, %rsi, %rdi # sched: [2:0.67]
-; SANDY-NEXT: shldq $7, %rsi, (%rdx) # sched: [8:1.00]
-; SANDY-NEXT: shrdq $7, %rsi, (%rdx) # sched: [8:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_shld_shrd_64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: shldq %cl, %rsi, %rdi # sched: [6:1.00]
-; HASWELL-NEXT: shrdq %cl, %rsi, %rdi # sched: [6:1.00]
-; HASWELL-NEXT: shldq %cl, %rsi, (%rdx) # sched: [12:1.00]
-; HASWELL-NEXT: shrdq %cl, %rsi, (%rdx) # sched: [12:1.00]
-; HASWELL-NEXT: shldq $7, %rsi, %rdi # sched: [3:1.00]
-; HASWELL-NEXT: shrdq $7, %rsi, %rdi # sched: [3:1.00]
-; HASWELL-NEXT: shldq $7, %rsi, (%rdx) # sched: [10:1.00]
-; HASWELL-NEXT: shrdq $7, %rsi, (%rdx) # sched: [10:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_shld_shrd_64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: shldq %cl, %rsi, %rdi # sched: [6:1.00]
-; BROADWELL-NEXT: shrdq %cl, %rsi, %rdi # sched: [6:1.00]
-; BROADWELL-NEXT: shldq %cl, %rsi, (%rdx) # sched: [11:1.00]
-; BROADWELL-NEXT: shrdq %cl, %rsi, (%rdx) # sched: [11:1.00]
-; BROADWELL-NEXT: shldq $7, %rsi, %rdi # sched: [3:1.00]
-; BROADWELL-NEXT: shrdq $7, %rsi, %rdi # sched: [3:1.00]
-; BROADWELL-NEXT: shldq $7, %rsi, (%rdx) # sched: [9:1.00]
-; BROADWELL-NEXT: shrdq $7, %rsi, (%rdx) # sched: [9:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_shld_shrd_64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: shldq %cl, %rsi, %rdi # sched: [6:1.00]
-; SKYLAKE-NEXT: shrdq %cl, %rsi, %rdi # sched: [6:1.00]
-; SKYLAKE-NEXT: shldq %cl, %rsi, (%rdx) # sched: [11:1.00]
-; SKYLAKE-NEXT: shrdq %cl, %rsi, (%rdx) # sched: [11:1.00]
-; SKYLAKE-NEXT: shldq $7, %rsi, %rdi # sched: [3:1.00]
-; SKYLAKE-NEXT: shrdq $7, %rsi, %rdi # sched: [3:1.00]
-; SKYLAKE-NEXT: shldq $7, %rsi, (%rdx) # sched: [9:1.00]
-; SKYLAKE-NEXT: shrdq $7, %rsi, (%rdx) # sched: [9:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_shld_shrd_64:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: shldq %cl, %rsi, %rdi # sched: [6:1.00]
-; SKX-NEXT: shrdq %cl, %rsi, %rdi # sched: [6:1.00]
-; SKX-NEXT: shldq %cl, %rsi, (%rdx) # sched: [11:1.00]
-; SKX-NEXT: shrdq %cl, %rsi, (%rdx) # sched: [11:1.00]
-; SKX-NEXT: shldq $7, %rsi, %rdi # sched: [3:1.00]
-; SKX-NEXT: shrdq $7, %rsi, %rdi # sched: [3:1.00]
-; SKX-NEXT: shldq $7, %rsi, (%rdx) # sched: [9:1.00]
-; SKX-NEXT: shrdq $7, %rsi, (%rdx) # sched: [9:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_shld_shrd_64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: shldq %cl, %rsi, %rdi # sched: [4:4.00]
-; BDVER2-NEXT: shrdq %cl, %rsi, %rdi # sched: [4:4.00]
-; BDVER2-NEXT: shldq %cl, %rsi, (%rdx) # sched: [4:11.00]
-; BDVER2-NEXT: shrdq %cl, %rsi, (%rdx) # sched: [4:11.00]
-; BDVER2-NEXT: shldq $7, %rsi, %rdi # sched: [4:3.00]
-; BDVER2-NEXT: shrdq $7, %rsi, %rdi # sched: [4:3.00]
-; BDVER2-NEXT: shldq $7, %rsi, (%rdx) # sched: [4:11.00]
-; BDVER2-NEXT: shrdq $7, %rsi, (%rdx) # sched: [4:11.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_shld_shrd_64:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: shldq %cl, %rsi, %rdi # sched: [4:4.00]
-; BTVER2-NEXT: shrdq %cl, %rsi, %rdi # sched: [4:4.00]
-; BTVER2-NEXT: shldq %cl, %rsi, (%rdx) # sched: [9:11.00]
-; BTVER2-NEXT: shrdq %cl, %rsi, (%rdx) # sched: [9:11.00]
-; BTVER2-NEXT: shldq $7, %rsi, %rdi # sched: [3:3.00]
-; BTVER2-NEXT: shrdq $7, %rsi, %rdi # sched: [3:3.00]
-; BTVER2-NEXT: shldq $7, %rsi, (%rdx) # sched: [9:11.00]
-; BTVER2-NEXT: shrdq $7, %rsi, (%rdx) # sched: [9:11.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_shld_shrd_64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: shldq %cl, %rsi, %rdi # sched: [100:0.25]
-; ZNVER1-NEXT: shrdq %cl, %rsi, %rdi # sched: [100:0.25]
-; ZNVER1-NEXT: shldq %cl, %rsi, (%rdx) # sched: [100:0.25]
-; ZNVER1-NEXT: shrdq %cl, %rsi, (%rdx) # sched: [100:0.25]
-; ZNVER1-NEXT: shldq $7, %rsi, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: shrdq $7, %rsi, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: shldq $7, %rsi, (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: shrdq $7, %rsi, (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "shld $1, $0 \0A\09 shrd $1, $0 \0A\09 shld $1, $2 \0A\09 shrd $1, $2 \0A\09 shld $3, $1, $0 \0A\09 shrd $3, $1, $0 \0A\09 shld $3, $1, $2 \0A\09 shrd $3, $1, $2", "r,r,*m,i"(i64 %a0, i64 %a1, i64 *%a2, i8 7)
- ret void
-}
-
-; TODO - test_sidt
-; TODO - test_sldt
-; TODO - test_smsw
-
-define void @test_stc_std() optsize {
-; GENERIC-LABEL: test_stc_std:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: stc # sched: [1:0.33]
-; GENERIC-NEXT: std # sched: [1:0.33]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_stc_std:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: stc # sched: [1:0.50]
-; ATOM-NEXT: std # sched: [21:10.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_stc_std:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: stc # sched: [1:0.50]
-; SLM-NEXT: std # sched: [1:0.50]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_stc_std:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: stc # sched: [1:0.33]
-; SANDY-NEXT: std # sched: [1:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_stc_std:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: stc # sched: [1:0.25]
-; HASWELL-NEXT: std # sched: [6:1.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_stc_std:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: stc # sched: [1:0.25]
-; BROADWELL-NEXT: std # sched: [6:1.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_stc_std:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: stc # sched: [1:0.25]
-; SKYLAKE-NEXT: std # sched: [6:1.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_stc_std:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: stc # sched: [1:0.25]
-; SKX-NEXT: std # sched: [6:1.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_stc_std:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: stc # sched: [1:0.50]
-; BDVER2-NEXT: std # sched: [1:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_stc_std:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: stc # sched: [1:0.50]
-; BTVER2-NEXT: std # sched: [1:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_stc_std:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: stc # sched: [1:0.25]
-; ZNVER1-NEXT: std # sched: [1:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "stc \0A\09 std", ""()
- ret void
-}
-
-; TODO - test_sti
-; TODO - test_stgi
-
-define void @test_stos() optsize {
-; GENERIC-LABEL: test_stos:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: stosb %al, %es:(%rdi) # sched: [5:1.00]
-; GENERIC-NEXT: stosw %ax, %es:(%rdi) # sched: [5:1.00]
-; GENERIC-NEXT: stosl %eax, %es:(%rdi) # sched: [5:1.00]
-; GENERIC-NEXT: stosq %rax, %es:(%rdi) # sched: [5:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_stos:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: stosb %al, %es:(%rdi) # sched: [1:0.50]
-; ATOM-NEXT: stosw %ax, %es:(%rdi) # sched: [1:0.50]
-; ATOM-NEXT: stosl %eax, %es:(%rdi) # sched: [1:0.50]
-; ATOM-NEXT: stosq %rax, %es:(%rdi) # sched: [1:0.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_stos:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: stosb %al, %es:(%rdi) # sched: [100:1.00]
-; SLM-NEXT: stosw %ax, %es:(%rdi) # sched: [100:1.00]
-; SLM-NEXT: stosl %eax, %es:(%rdi) # sched: [100:1.00]
-; SLM-NEXT: stosq %rax, %es:(%rdi) # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_stos:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: stosb %al, %es:(%rdi) # sched: [5:1.00]
-; SANDY-NEXT: stosw %ax, %es:(%rdi) # sched: [5:1.00]
-; SANDY-NEXT: stosl %eax, %es:(%rdi) # sched: [5:1.00]
-; SANDY-NEXT: stosq %rax, %es:(%rdi) # sched: [5:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_stos:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: stosb %al, %es:(%rdi) # sched: [2:1.00]
-; HASWELL-NEXT: stosw %ax, %es:(%rdi) # sched: [2:1.00]
-; HASWELL-NEXT: stosl %eax, %es:(%rdi) # sched: [2:1.00]
-; HASWELL-NEXT: stosq %rax, %es:(%rdi) # sched: [2:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_stos:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: stosb %al, %es:(%rdi) # sched: [2:1.00]
-; BROADWELL-NEXT: stosw %ax, %es:(%rdi) # sched: [2:1.00]
-; BROADWELL-NEXT: stosl %eax, %es:(%rdi) # sched: [2:1.00]
-; BROADWELL-NEXT: stosq %rax, %es:(%rdi) # sched: [2:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_stos:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: stosb %al, %es:(%rdi) # sched: [2:1.00]
-; SKYLAKE-NEXT: stosw %ax, %es:(%rdi) # sched: [2:1.00]
-; SKYLAKE-NEXT: stosl %eax, %es:(%rdi) # sched: [2:1.00]
-; SKYLAKE-NEXT: stosq %rax, %es:(%rdi) # sched: [2:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_stos:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: stosb %al, %es:(%rdi) # sched: [2:1.00]
-; SKX-NEXT: stosw %ax, %es:(%rdi) # sched: [2:1.00]
-; SKX-NEXT: stosl %eax, %es:(%rdi) # sched: [2:1.00]
-; SKX-NEXT: stosq %rax, %es:(%rdi) # sched: [2:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_stos:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: stosb %al, %es:(%rdi) # sched: [100:0.50]
-; BDVER2-NEXT: stosw %ax, %es:(%rdi) # sched: [100:0.50]
-; BDVER2-NEXT: stosl %eax, %es:(%rdi) # sched: [100:0.50]
-; BDVER2-NEXT: stosq %rax, %es:(%rdi) # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_stos:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: stosb %al, %es:(%rdi) # sched: [100:0.50]
-; BTVER2-NEXT: stosw %ax, %es:(%rdi) # sched: [100:0.50]
-; BTVER2-NEXT: stosl %eax, %es:(%rdi) # sched: [100:0.50]
-; BTVER2-NEXT: stosq %rax, %es:(%rdi) # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_stos:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: stosb %al, %es:(%rdi) # sched: [100:0.25]
-; ZNVER1-NEXT: stosw %ax, %es:(%rdi) # sched: [100:0.25]
-; ZNVER1-NEXT: stosl %eax, %es:(%rdi) # sched: [100:0.25]
-; ZNVER1-NEXT: stosq %rax, %es:(%rdi) # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "stosb \0A\09 stosw \0A\09 stosl \0A\09 stosq", ""()
- ret void
-}
-
-; TODO - test_str
-
-define void @test_sub_8(i8 %a0, i8* %a1, i8 %a2) optsize {
-; GENERIC-LABEL: test_sub_8:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: subb $7, %al # sched: [1:0.33]
-; GENERIC-NEXT: subb $7, %dil # sched: [1:0.33]
-; GENERIC-NEXT: subb $7, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: subb %dl, %dil # sched: [1:0.33]
-; GENERIC-NEXT: subb %dil, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: subb (%rsi), %dil # sched: [6:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_sub_8:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: subb $7, %al # sched: [1:0.50]
-; ATOM-NEXT: subb $7, %dil # sched: [1:0.50]
-; ATOM-NEXT: subb $7, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: subb %dl, %dil # sched: [1:0.50]
-; ATOM-NEXT: subb %dil, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: subb (%rsi), %dil # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_sub_8:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: subb $7, %al # sched: [1:0.50]
-; SLM-NEXT: subb $7, %dil # sched: [1:0.50]
-; SLM-NEXT: subb $7, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: subb %dl, %dil # sched: [1:0.50]
-; SLM-NEXT: subb %dil, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: subb (%rsi), %dil # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_sub_8:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: subb $7, %al # sched: [1:0.33]
-; SANDY-NEXT: subb $7, %dil # sched: [1:0.33]
-; SANDY-NEXT: subb $7, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: subb %dl, %dil # sched: [1:0.33]
-; SANDY-NEXT: subb %dil, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: subb (%rsi), %dil # sched: [6:0.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_sub_8:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: subb $7, %al # sched: [1:0.25]
-; HASWELL-NEXT: subb $7, %dil # sched: [1:0.25]
-; HASWELL-NEXT: subb $7, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: subb %dl, %dil # sched: [1:0.25]
-; HASWELL-NEXT: subb %dil, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: subb (%rsi), %dil # sched: [6:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_sub_8:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: subb $7, %al # sched: [1:0.25]
-; BROADWELL-NEXT: subb $7, %dil # sched: [1:0.25]
-; BROADWELL-NEXT: subb $7, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: subb %dl, %dil # sched: [1:0.25]
-; BROADWELL-NEXT: subb %dil, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: subb (%rsi), %dil # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_sub_8:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: subb $7, %al # sched: [1:0.25]
-; SKYLAKE-NEXT: subb $7, %dil # sched: [1:0.25]
-; SKYLAKE-NEXT: subb $7, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: subb %dl, %dil # sched: [1:0.25]
-; SKYLAKE-NEXT: subb %dil, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: subb (%rsi), %dil # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_sub_8:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: subb $7, %al # sched: [1:0.25]
-; SKX-NEXT: subb $7, %dil # sched: [1:0.25]
-; SKX-NEXT: subb $7, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: subb %dl, %dil # sched: [1:0.25]
-; SKX-NEXT: subb %dil, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: subb (%rsi), %dil # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_sub_8:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: subb $7, %al # sched: [1:0.50]
-; BDVER2-NEXT: subb $7, %dil # sched: [1:0.50]
-; BDVER2-NEXT: subb $7, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: subb %dl, %dil # sched: [1:0.50]
-; BDVER2-NEXT: subb %dil, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: subb (%rsi), %dil # sched: [5:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_sub_8:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: subb $7, %al # sched: [1:0.50]
-; BTVER2-NEXT: subb $7, %dil # sched: [1:0.50]
-; BTVER2-NEXT: subb $7, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: subb %dl, %dil # sched: [1:0.50]
-; BTVER2-NEXT: subb %dil, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: subb (%rsi), %dil # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_sub_8:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: subb $7, %al # sched: [1:0.25]
-; ZNVER1-NEXT: subb $7, %dil # sched: [1:0.25]
-; ZNVER1-NEXT: subb $7, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: subb %dl, %dil # sched: [1:0.25]
-; ZNVER1-NEXT: subb %dil, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: subb (%rsi), %dil # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "subb $3, %AL \0A\09 subb $3, $0 \0A\09 subb $3, $2 \0A\09 subb $1, $0 \0A\09 subb $0, $2 \0A\09 subb $2, $0", "r,r,*m,i"(i8 %a0, i8 %a2, i8* %a1, i8 7) nounwind
- ret void
-}
-define void @test_sub_16(i16 %a0, i16* %a1, i16 %a2) optsize {
-; GENERIC-LABEL: test_sub_16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: subw $511, %ax # imm = 0x1FF
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: subw $511, %di # imm = 0x1FF
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: subw $511, (%rsi) # imm = 0x1FF
-; GENERIC-NEXT: # sched: [7:1.00]
-; GENERIC-NEXT: subw $7, %di # sched: [1:0.33]
-; GENERIC-NEXT: subw $7, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: subw %dx, %di # sched: [1:0.33]
-; GENERIC-NEXT: subw %di, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: subw (%rsi), %di # sched: [6:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_sub_16:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: subw $511, %ax # imm = 0x1FF
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: subw $511, %di # imm = 0x1FF
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: subw $511, (%rsi) # imm = 0x1FF
-; ATOM-NEXT: # sched: [1:1.00]
-; ATOM-NEXT: subw $7, %di # sched: [1:0.50]
-; ATOM-NEXT: subw $7, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: subw %dx, %di # sched: [1:0.50]
-; ATOM-NEXT: subw %di, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: subw (%rsi), %di # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_sub_16:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: subw $511, %ax # imm = 0x1FF
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: subw $511, %di # imm = 0x1FF
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: subw $511, (%rsi) # imm = 0x1FF
-; SLM-NEXT: # sched: [5:2.00]
-; SLM-NEXT: subw $7, %di # sched: [1:0.50]
-; SLM-NEXT: subw $7, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: subw %dx, %di # sched: [1:0.50]
-; SLM-NEXT: subw %di, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: subw (%rsi), %di # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_sub_16:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: subw $511, %ax # imm = 0x1FF
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: subw $511, %di # imm = 0x1FF
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: subw $511, (%rsi) # imm = 0x1FF
-; SANDY-NEXT: # sched: [7:1.00]
-; SANDY-NEXT: subw $7, %di # sched: [1:0.33]
-; SANDY-NEXT: subw $7, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: subw %dx, %di # sched: [1:0.33]
-; SANDY-NEXT: subw %di, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: subw (%rsi), %di # sched: [6:0.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_sub_16:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: subw $511, %ax # imm = 0x1FF
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: subw $511, %di # imm = 0x1FF
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: subw $511, (%rsi) # imm = 0x1FF
-; HASWELL-NEXT: # sched: [7:1.00]
-; HASWELL-NEXT: subw $7, %di # sched: [1:0.25]
-; HASWELL-NEXT: subw $7, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: subw %dx, %di # sched: [1:0.25]
-; HASWELL-NEXT: subw %di, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: subw (%rsi), %di # sched: [6:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_sub_16:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: subw $511, %ax # imm = 0x1FF
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: subw $511, %di # imm = 0x1FF
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: subw $511, (%rsi) # imm = 0x1FF
-; BROADWELL-NEXT: # sched: [7:1.00]
-; BROADWELL-NEXT: subw $7, %di # sched: [1:0.25]
-; BROADWELL-NEXT: subw $7, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: subw %dx, %di # sched: [1:0.25]
-; BROADWELL-NEXT: subw %di, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: subw (%rsi), %di # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_sub_16:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: subw $511, %ax # imm = 0x1FF
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: subw $511, %di # imm = 0x1FF
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: subw $511, (%rsi) # imm = 0x1FF
-; SKYLAKE-NEXT: # sched: [7:1.00]
-; SKYLAKE-NEXT: subw $7, %di # sched: [1:0.25]
-; SKYLAKE-NEXT: subw $7, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: subw %dx, %di # sched: [1:0.25]
-; SKYLAKE-NEXT: subw %di, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: subw (%rsi), %di # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_sub_16:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: subw $511, %ax # imm = 0x1FF
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: subw $511, %di # imm = 0x1FF
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: subw $511, (%rsi) # imm = 0x1FF
-; SKX-NEXT: # sched: [7:1.00]
-; SKX-NEXT: subw $7, %di # sched: [1:0.25]
-; SKX-NEXT: subw $7, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: subw %dx, %di # sched: [1:0.25]
-; SKX-NEXT: subw %di, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: subw (%rsi), %di # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_sub_16:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: subw $511, %ax # imm = 0x1FF
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: subw $511, %di # imm = 0x1FF
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: subw $511, (%rsi) # imm = 0x1FF
-; BDVER2-NEXT: # sched: [6:1.00]
-; BDVER2-NEXT: subw $7, %di # sched: [1:0.50]
-; BDVER2-NEXT: subw $7, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: subw %dx, %di # sched: [1:0.50]
-; BDVER2-NEXT: subw %di, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: subw (%rsi), %di # sched: [5:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_sub_16:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: subw $511, %ax # imm = 0x1FF
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: subw $511, %di # imm = 0x1FF
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: subw $511, (%rsi) # imm = 0x1FF
-; BTVER2-NEXT: # sched: [5:1.00]
-; BTVER2-NEXT: subw $7, %di # sched: [1:0.50]
-; BTVER2-NEXT: subw $7, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: subw %dx, %di # sched: [1:0.50]
-; BTVER2-NEXT: subw %di, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: subw (%rsi), %di # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_sub_16:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: subw $511, %ax # imm = 0x1FF
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: subw $511, %di # imm = 0x1FF
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: subw $511, (%rsi) # imm = 0x1FF
-; ZNVER1-NEXT: # sched: [5:0.50]
-; ZNVER1-NEXT: subw $7, %di # sched: [1:0.25]
-; ZNVER1-NEXT: subw $7, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: subw %dx, %di # sched: [1:0.25]
-; ZNVER1-NEXT: subw %di, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: subw (%rsi), %di # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "subw $3, %AX \0A\09 subw $3, $0 \0A\09 subw $3, $2 \0A\09 subw $4, $0 \0A\09 subw $4, $2 \0A\09 subw $1, $0 \0A\09 subw $0, $2 \0A\09 subw $2, $0", "r,r,*m,i,i"(i16 %a0, i16 %a2, i16* %a1, i16 511, i8 7) nounwind
- ret void
-}
-define void @test_sub_32(i32 %a0, i32* %a1, i32 %a2) optsize {
-; GENERIC-LABEL: test_sub_32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: subl $665536, %eax # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: subl $665536, %edi # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: subl $665536, (%rsi) # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [7:1.00]
-; GENERIC-NEXT: subl $7, %edi # sched: [1:0.33]
-; GENERIC-NEXT: subl $7, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: subl %edx, %edi # sched: [1:0.33]
-; GENERIC-NEXT: subl %edi, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: subl (%rsi), %edi # sched: [6:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_sub_32:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: subl $665536, %eax # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: subl $665536, %edi # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: subl $665536, (%rsi) # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:1.00]
-; ATOM-NEXT: subl $7, %edi # sched: [1:0.50]
-; ATOM-NEXT: subl $7, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: subl %edx, %edi # sched: [1:0.50]
-; ATOM-NEXT: subl %edi, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: subl (%rsi), %edi # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_sub_32:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: subl $665536, %eax # imm = 0xA27C0
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: subl $665536, %edi # imm = 0xA27C0
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: subl $665536, (%rsi) # imm = 0xA27C0
-; SLM-NEXT: # sched: [5:2.00]
-; SLM-NEXT: subl $7, %edi # sched: [1:0.50]
-; SLM-NEXT: subl $7, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: subl %edx, %edi # sched: [1:0.50]
-; SLM-NEXT: subl %edi, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: subl (%rsi), %edi # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_sub_32:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: subl $665536, %eax # imm = 0xA27C0
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: subl $665536, %edi # imm = 0xA27C0
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: subl $665536, (%rsi) # imm = 0xA27C0
-; SANDY-NEXT: # sched: [7:1.00]
-; SANDY-NEXT: subl $7, %edi # sched: [1:0.33]
-; SANDY-NEXT: subl $7, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: subl %edx, %edi # sched: [1:0.33]
-; SANDY-NEXT: subl %edi, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: subl (%rsi), %edi # sched: [6:0.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_sub_32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: subl $665536, %eax # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: subl $665536, %edi # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: subl $665536, (%rsi) # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [7:1.00]
-; HASWELL-NEXT: subl $7, %edi # sched: [1:0.25]
-; HASWELL-NEXT: subl $7, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: subl %edx, %edi # sched: [1:0.25]
-; HASWELL-NEXT: subl %edi, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: subl (%rsi), %edi # sched: [6:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_sub_32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: subl $665536, %eax # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: subl $665536, %edi # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: subl $665536, (%rsi) # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [7:1.00]
-; BROADWELL-NEXT: subl $7, %edi # sched: [1:0.25]
-; BROADWELL-NEXT: subl $7, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: subl %edx, %edi # sched: [1:0.25]
-; BROADWELL-NEXT: subl %edi, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: subl (%rsi), %edi # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_sub_32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: subl $665536, %eax # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: subl $665536, %edi # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: subl $665536, (%rsi) # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [7:1.00]
-; SKYLAKE-NEXT: subl $7, %edi # sched: [1:0.25]
-; SKYLAKE-NEXT: subl $7, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: subl %edx, %edi # sched: [1:0.25]
-; SKYLAKE-NEXT: subl %edi, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: subl (%rsi), %edi # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_sub_32:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: subl $665536, %eax # imm = 0xA27C0
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: subl $665536, %edi # imm = 0xA27C0
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: subl $665536, (%rsi) # imm = 0xA27C0
-; SKX-NEXT: # sched: [7:1.00]
-; SKX-NEXT: subl $7, %edi # sched: [1:0.25]
-; SKX-NEXT: subl $7, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: subl %edx, %edi # sched: [1:0.25]
-; SKX-NEXT: subl %edi, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: subl (%rsi), %edi # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_sub_32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: subl $665536, %eax # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: subl $665536, %edi # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: subl $665536, (%rsi) # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [6:1.00]
-; BDVER2-NEXT: subl $7, %edi # sched: [1:0.50]
-; BDVER2-NEXT: subl $7, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: subl %edx, %edi # sched: [1:0.50]
-; BDVER2-NEXT: subl %edi, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: subl (%rsi), %edi # sched: [5:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_sub_32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: subl $665536, %eax # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: subl $665536, %edi # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: subl $665536, (%rsi) # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [5:1.00]
-; BTVER2-NEXT: subl $7, %edi # sched: [1:0.50]
-; BTVER2-NEXT: subl $7, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: subl %edx, %edi # sched: [1:0.50]
-; BTVER2-NEXT: subl %edi, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: subl (%rsi), %edi # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_sub_32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: subl $665536, %eax # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: subl $665536, %edi # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: subl $665536, (%rsi) # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [5:0.50]
-; ZNVER1-NEXT: subl $7, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: subl $7, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: subl %edx, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: subl %edi, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: subl (%rsi), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "subl $3, %EAX \0A\09 subl $3, $0 \0A\09 subl $3, $2 \0A\09 subl $4, $0 \0A\09 subl $4, $2 \0A\09 subl $1, $0 \0A\09 subl $0, $2 \0A\09 subl $2, $0", "r,r,*m,i,i"(i32 %a0, i32 %a2, i32* %a1, i32 665536, i8 7) nounwind
- ret void
-}
-define void @test_sub_64(i64 %a0, i64* %a1, i64 %a2) optsize {
-; GENERIC-LABEL: test_sub_64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: subq $665536, %rax # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: subq $665536, %rdi # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: subq $665536, (%rsi) # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [7:1.00]
-; GENERIC-NEXT: subq $7, %rdi # sched: [1:0.33]
-; GENERIC-NEXT: subq $7, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: subq %rdx, %rdi # sched: [1:0.33]
-; GENERIC-NEXT: subq %rdi, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: subq (%rsi), %rdi # sched: [6:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_sub_64:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: subq $665536, %rax # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: subq $665536, %rdi # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: subq $665536, (%rsi) # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:1.00]
-; ATOM-NEXT: subq $7, %rdi # sched: [1:0.50]
-; ATOM-NEXT: subq $7, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: subq %rdx, %rdi # sched: [1:0.50]
-; ATOM-NEXT: subq %rdi, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: subq (%rsi), %rdi # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_sub_64:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: subq $665536, %rax # imm = 0xA27C0
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: subq $665536, %rdi # imm = 0xA27C0
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: subq $665536, (%rsi) # imm = 0xA27C0
-; SLM-NEXT: # sched: [5:2.00]
-; SLM-NEXT: subq $7, %rdi # sched: [1:0.50]
-; SLM-NEXT: subq $7, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: subq %rdx, %rdi # sched: [1:0.50]
-; SLM-NEXT: subq %rdi, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: subq (%rsi), %rdi # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_sub_64:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: subq $665536, %rax # imm = 0xA27C0
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: subq $665536, %rdi # imm = 0xA27C0
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: subq $665536, (%rsi) # imm = 0xA27C0
-; SANDY-NEXT: # sched: [7:1.00]
-; SANDY-NEXT: subq $7, %rdi # sched: [1:0.33]
-; SANDY-NEXT: subq $7, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: subq %rdx, %rdi # sched: [1:0.33]
-; SANDY-NEXT: subq %rdi, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: subq (%rsi), %rdi # sched: [6:0.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_sub_64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: subq $665536, %rax # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: subq $665536, %rdi # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: subq $665536, (%rsi) # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [7:1.00]
-; HASWELL-NEXT: subq $7, %rdi # sched: [1:0.25]
-; HASWELL-NEXT: subq $7, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: subq %rdx, %rdi # sched: [1:0.25]
-; HASWELL-NEXT: subq %rdi, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: subq (%rsi), %rdi # sched: [6:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_sub_64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: subq $665536, %rax # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: subq $665536, %rdi # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: subq $665536, (%rsi) # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [7:1.00]
-; BROADWELL-NEXT: subq $7, %rdi # sched: [1:0.25]
-; BROADWELL-NEXT: subq $7, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: subq %rdx, %rdi # sched: [1:0.25]
-; BROADWELL-NEXT: subq %rdi, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: subq (%rsi), %rdi # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_sub_64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: subq $665536, %rax # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: subq $665536, %rdi # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: subq $665536, (%rsi) # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [7:1.00]
-; SKYLAKE-NEXT: subq $7, %rdi # sched: [1:0.25]
-; SKYLAKE-NEXT: subq $7, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: subq %rdx, %rdi # sched: [1:0.25]
-; SKYLAKE-NEXT: subq %rdi, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: subq (%rsi), %rdi # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_sub_64:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: subq $665536, %rax # imm = 0xA27C0
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: subq $665536, %rdi # imm = 0xA27C0
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: subq $665536, (%rsi) # imm = 0xA27C0
-; SKX-NEXT: # sched: [7:1.00]
-; SKX-NEXT: subq $7, %rdi # sched: [1:0.25]
-; SKX-NEXT: subq $7, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: subq %rdx, %rdi # sched: [1:0.25]
-; SKX-NEXT: subq %rdi, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: subq (%rsi), %rdi # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_sub_64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: subq $665536, %rax # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: subq $665536, %rdi # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: subq $665536, (%rsi) # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [6:1.00]
-; BDVER2-NEXT: subq $7, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: subq $7, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: subq %rdx, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: subq %rdi, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: subq (%rsi), %rdi # sched: [5:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_sub_64:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: subq $665536, %rax # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: subq $665536, %rdi # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: subq $665536, (%rsi) # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [5:1.00]
-; BTVER2-NEXT: subq $7, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: subq $7, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: subq %rdx, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: subq %rdi, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: subq (%rsi), %rdi # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_sub_64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: subq $665536, %rax # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: subq $665536, %rdi # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: subq $665536, (%rsi) # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [5:0.50]
-; ZNVER1-NEXT: subq $7, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: subq $7, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: subq %rdx, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: subq %rdi, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: subq (%rsi), %rdi # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "subq $3, %RAX \0A\09 subq $3, $0 \0A\09 subq $3, $2 \0A\09 subq $4, $0 \0A\09 subq $4, $2 \0A\09 subq $1, $0 \0A\09 subq $0, $2 \0A\09 subq $2, $0", "r,r,*m,i,i"(i64 %a0, i64 %a2, i64* %a1, i32 665536, i8 7) nounwind
- ret void
-}
-
-; TODO - test_swapgs
-; TODO - test_syscall
-; TODO - test_sysenter
-; TODO - test_sysexit
-; TODO - test_sysret
-
-define void @test_test_8(i8 %a0, i8* %a1) optsize {
-; GENERIC-LABEL: test_test_8:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: testb $7, %al # sched: [1:0.33]
-; GENERIC-NEXT: testb $7, %dil # sched: [1:0.33]
-; GENERIC-NEXT: testb $7, (%rsi) # sched: [6:0.50]
-; GENERIC-NEXT: testb %dil, %dil # sched: [1:0.33]
-; GENERIC-NEXT: testb %dil, (%rsi) # sched: [6:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_test_8:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: testb $7, %al # sched: [1:0.50]
-; ATOM-NEXT: testb $7, %dil # sched: [1:0.50]
-; ATOM-NEXT: testb $7, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: testb %dil, %dil # sched: [1:0.50]
-; ATOM-NEXT: testb %dil, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_test_8:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: testb $7, %al # sched: [1:0.50]
-; SLM-NEXT: testb $7, %dil # sched: [1:0.50]
-; SLM-NEXT: testb $7, (%rsi) # sched: [4:1.00]
-; SLM-NEXT: testb %dil, %dil # sched: [1:0.50]
-; SLM-NEXT: testb %dil, (%rsi) # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_test_8:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: testb $7, %al # sched: [1:0.33]
-; SANDY-NEXT: testb $7, %dil # sched: [1:0.33]
-; SANDY-NEXT: testb $7, (%rsi) # sched: [6:0.50]
-; SANDY-NEXT: testb %dil, %dil # sched: [1:0.33]
-; SANDY-NEXT: testb %dil, (%rsi) # sched: [6:0.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_test_8:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: testb $7, %al # sched: [1:0.25]
-; HASWELL-NEXT: testb $7, %dil # sched: [1:0.25]
-; HASWELL-NEXT: testb $7, (%rsi) # sched: [6:0.50]
-; HASWELL-NEXT: testb %dil, %dil # sched: [1:0.25]
-; HASWELL-NEXT: testb %dil, (%rsi) # sched: [6:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_test_8:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: testb $7, %al # sched: [1:0.25]
-; BROADWELL-NEXT: testb $7, %dil # sched: [1:0.25]
-; BROADWELL-NEXT: testb $7, (%rsi) # sched: [6:0.50]
-; BROADWELL-NEXT: testb %dil, %dil # sched: [1:0.25]
-; BROADWELL-NEXT: testb %dil, (%rsi) # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_test_8:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: testb $7, %al # sched: [1:0.25]
-; SKYLAKE-NEXT: testb $7, %dil # sched: [1:0.25]
-; SKYLAKE-NEXT: testb $7, (%rsi) # sched: [6:0.50]
-; SKYLAKE-NEXT: testb %dil, %dil # sched: [1:0.25]
-; SKYLAKE-NEXT: testb %dil, (%rsi) # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_test_8:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: testb $7, %al # sched: [1:0.25]
-; SKX-NEXT: testb $7, %dil # sched: [1:0.25]
-; SKX-NEXT: testb $7, (%rsi) # sched: [6:0.50]
-; SKX-NEXT: testb %dil, %dil # sched: [1:0.25]
-; SKX-NEXT: testb %dil, (%rsi) # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_test_8:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: testb $7, %al # sched: [1:0.50]
-; BDVER2-NEXT: testb $7, %dil # sched: [1:0.50]
-; BDVER2-NEXT: testb $7, (%rsi) # sched: [5:0.50]
-; BDVER2-NEXT: testb %dil, %dil # sched: [1:0.50]
-; BDVER2-NEXT: testb %dil, (%rsi) # sched: [5:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_test_8:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: testb $7, %al # sched: [1:0.50]
-; BTVER2-NEXT: testb $7, %dil # sched: [1:0.50]
-; BTVER2-NEXT: testb $7, (%rsi) # sched: [4:1.00]
-; BTVER2-NEXT: testb %dil, %dil # sched: [1:0.50]
-; BTVER2-NEXT: testb %dil, (%rsi) # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_test_8:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: testb $7, %al # sched: [1:0.25]
-; ZNVER1-NEXT: testb $7, %dil # sched: [1:0.25]
-; ZNVER1-NEXT: testb $7, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: testb %dil, %dil # sched: [1:0.25]
-; ZNVER1-NEXT: testb %dil, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "testb $2, %AL \0A\09 testb $2, $0 \0A\09 testb $2, $1 \0A\09 testb $0, $0 \0A\09 testb $0, $1", "r,*m,i"(i8 %a0, i8* %a1, i8 7) nounwind
- ret void
-}
-define void @test_test_16(i16 %a0, i16* %a1) optsize {
-; GENERIC-LABEL: test_test_16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: testw $511, %ax # imm = 0x1FF
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: testw $511, %di # imm = 0x1FF
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: testw $511, (%rsi) # imm = 0x1FF
-; GENERIC-NEXT: # sched: [6:0.50]
-; GENERIC-NEXT: testw %di, %di # sched: [1:0.33]
-; GENERIC-NEXT: testw %di, (%rsi) # sched: [6:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_test_16:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: testw $511, %ax # imm = 0x1FF
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: testw $511, %di # imm = 0x1FF
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: testw $511, (%rsi) # imm = 0x1FF
-; ATOM-NEXT: # sched: [1:1.00]
-; ATOM-NEXT: testw %di, %di # sched: [1:0.50]
-; ATOM-NEXT: testw %di, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_test_16:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: testw $511, %ax # imm = 0x1FF
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: testw $511, %di # imm = 0x1FF
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: testw $511, (%rsi) # imm = 0x1FF
-; SLM-NEXT: # sched: [4:1.00]
-; SLM-NEXT: testw %di, %di # sched: [1:0.50]
-; SLM-NEXT: testw %di, (%rsi) # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_test_16:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: testw $511, %ax # imm = 0x1FF
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: testw $511, %di # imm = 0x1FF
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: testw $511, (%rsi) # imm = 0x1FF
-; SANDY-NEXT: # sched: [6:0.50]
-; SANDY-NEXT: testw %di, %di # sched: [1:0.33]
-; SANDY-NEXT: testw %di, (%rsi) # sched: [6:0.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_test_16:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: testw $511, %ax # imm = 0x1FF
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: testw $511, %di # imm = 0x1FF
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: testw $511, (%rsi) # imm = 0x1FF
-; HASWELL-NEXT: # sched: [6:0.50]
-; HASWELL-NEXT: testw %di, %di # sched: [1:0.25]
-; HASWELL-NEXT: testw %di, (%rsi) # sched: [6:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_test_16:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: testw $511, %ax # imm = 0x1FF
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: testw $511, %di # imm = 0x1FF
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: testw $511, (%rsi) # imm = 0x1FF
-; BROADWELL-NEXT: # sched: [6:0.50]
-; BROADWELL-NEXT: testw %di, %di # sched: [1:0.25]
-; BROADWELL-NEXT: testw %di, (%rsi) # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_test_16:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: testw $511, %ax # imm = 0x1FF
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: testw $511, %di # imm = 0x1FF
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: testw $511, (%rsi) # imm = 0x1FF
-; SKYLAKE-NEXT: # sched: [6:0.50]
-; SKYLAKE-NEXT: testw %di, %di # sched: [1:0.25]
-; SKYLAKE-NEXT: testw %di, (%rsi) # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_test_16:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: testw $511, %ax # imm = 0x1FF
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: testw $511, %di # imm = 0x1FF
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: testw $511, (%rsi) # imm = 0x1FF
-; SKX-NEXT: # sched: [6:0.50]
-; SKX-NEXT: testw %di, %di # sched: [1:0.25]
-; SKX-NEXT: testw %di, (%rsi) # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_test_16:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: testw $511, %ax # imm = 0x1FF
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: testw $511, %di # imm = 0x1FF
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: testw $511, (%rsi) # imm = 0x1FF
-; BDVER2-NEXT: # sched: [5:0.50]
-; BDVER2-NEXT: testw %di, %di # sched: [1:0.50]
-; BDVER2-NEXT: testw %di, (%rsi) # sched: [5:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_test_16:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: testw $511, %ax # imm = 0x1FF
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: testw $511, %di # imm = 0x1FF
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: testw $511, (%rsi) # imm = 0x1FF
-; BTVER2-NEXT: # sched: [4:1.00]
-; BTVER2-NEXT: testw %di, %di # sched: [1:0.50]
-; BTVER2-NEXT: testw %di, (%rsi) # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_test_16:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: testw $511, %ax # imm = 0x1FF
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: testw $511, %di # imm = 0x1FF
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: testw $511, (%rsi) # imm = 0x1FF
-; ZNVER1-NEXT: # sched: [5:0.50]
-; ZNVER1-NEXT: testw %di, %di # sched: [1:0.25]
-; ZNVER1-NEXT: testw %di, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "testw $2, %AX \0A\09 testw $2, $0 \0A\09 testw $2, $1 \0A\09 testw $0, $0 \0A\09 testw $0, $1", "r,*m,i"(i16 %a0, i16* %a1, i16 511) nounwind
- ret void
-}
-define void @test_test_32(i32 %a0, i32* %a1) optsize {
-; GENERIC-LABEL: test_test_32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: testl $665536, %eax # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: testl $665536, %edi # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: testl $665536, (%rsi) # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [6:0.50]
-; GENERIC-NEXT: testl %edi, %edi # sched: [1:0.33]
-; GENERIC-NEXT: testl %edi, (%rsi) # sched: [6:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_test_32:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: testl $665536, %eax # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: testl $665536, %edi # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: testl $665536, (%rsi) # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:1.00]
-; ATOM-NEXT: testl %edi, %edi # sched: [1:0.50]
-; ATOM-NEXT: testl %edi, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_test_32:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: testl $665536, %eax # imm = 0xA27C0
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: testl $665536, %edi # imm = 0xA27C0
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: testl $665536, (%rsi) # imm = 0xA27C0
-; SLM-NEXT: # sched: [4:1.00]
-; SLM-NEXT: testl %edi, %edi # sched: [1:0.50]
-; SLM-NEXT: testl %edi, (%rsi) # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_test_32:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: testl $665536, %eax # imm = 0xA27C0
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: testl $665536, %edi # imm = 0xA27C0
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: testl $665536, (%rsi) # imm = 0xA27C0
-; SANDY-NEXT: # sched: [6:0.50]
-; SANDY-NEXT: testl %edi, %edi # sched: [1:0.33]
-; SANDY-NEXT: testl %edi, (%rsi) # sched: [6:0.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_test_32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: testl $665536, %eax # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: testl $665536, %edi # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: testl $665536, (%rsi) # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [6:0.50]
-; HASWELL-NEXT: testl %edi, %edi # sched: [1:0.25]
-; HASWELL-NEXT: testl %edi, (%rsi) # sched: [6:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_test_32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: testl $665536, %eax # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: testl $665536, %edi # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: testl $665536, (%rsi) # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [6:0.50]
-; BROADWELL-NEXT: testl %edi, %edi # sched: [1:0.25]
-; BROADWELL-NEXT: testl %edi, (%rsi) # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_test_32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: testl $665536, %eax # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: testl $665536, %edi # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: testl $665536, (%rsi) # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [6:0.50]
-; SKYLAKE-NEXT: testl %edi, %edi # sched: [1:0.25]
-; SKYLAKE-NEXT: testl %edi, (%rsi) # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_test_32:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: testl $665536, %eax # imm = 0xA27C0
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: testl $665536, %edi # imm = 0xA27C0
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: testl $665536, (%rsi) # imm = 0xA27C0
-; SKX-NEXT: # sched: [6:0.50]
-; SKX-NEXT: testl %edi, %edi # sched: [1:0.25]
-; SKX-NEXT: testl %edi, (%rsi) # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_test_32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: testl $665536, %eax # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: testl $665536, %edi # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: testl $665536, (%rsi) # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [5:0.50]
-; BDVER2-NEXT: testl %edi, %edi # sched: [1:0.50]
-; BDVER2-NEXT: testl %edi, (%rsi) # sched: [5:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_test_32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: testl $665536, %eax # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: testl $665536, %edi # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: testl $665536, (%rsi) # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [4:1.00]
-; BTVER2-NEXT: testl %edi, %edi # sched: [1:0.50]
-; BTVER2-NEXT: testl %edi, (%rsi) # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_test_32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: testl $665536, %eax # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: testl $665536, %edi # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: testl $665536, (%rsi) # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [5:0.50]
-; ZNVER1-NEXT: testl %edi, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: testl %edi, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "testl $2, %EAX \0A\09 testl $2, $0 \0A\09 testl $2, $1 \0A\09 testl $0, $0 \0A\09 testl $0, $1", "r,*m,i"(i32 %a0, i32* %a1, i32 665536) nounwind
- ret void
-}
-define void @test_test_64(i64 %a0, i64* %a1) optsize {
-; GENERIC-LABEL: test_test_64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: testq $665536, %rax # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: testq $665536, %rdi # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: testq $665536, (%rsi) # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [6:0.50]
-; GENERIC-NEXT: testq %rdi, %rdi # sched: [1:0.33]
-; GENERIC-NEXT: testq %rdi, (%rsi) # sched: [6:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_test_64:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: testq $665536, %rax # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: testq $665536, %rdi # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: testq $665536, (%rsi) # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:1.00]
-; ATOM-NEXT: testq %rdi, %rdi # sched: [1:0.50]
-; ATOM-NEXT: testq %rdi, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_test_64:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: testq $665536, %rax # imm = 0xA27C0
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: testq $665536, %rdi # imm = 0xA27C0
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: testq $665536, (%rsi) # imm = 0xA27C0
-; SLM-NEXT: # sched: [4:1.00]
-; SLM-NEXT: testq %rdi, %rdi # sched: [1:0.50]
-; SLM-NEXT: testq %rdi, (%rsi) # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_test_64:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: testq $665536, %rax # imm = 0xA27C0
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: testq $665536, %rdi # imm = 0xA27C0
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: testq $665536, (%rsi) # imm = 0xA27C0
-; SANDY-NEXT: # sched: [6:0.50]
-; SANDY-NEXT: testq %rdi, %rdi # sched: [1:0.33]
-; SANDY-NEXT: testq %rdi, (%rsi) # sched: [6:0.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_test_64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: testq $665536, %rax # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: testq $665536, %rdi # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: testq $665536, (%rsi) # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [6:0.50]
-; HASWELL-NEXT: testq %rdi, %rdi # sched: [1:0.25]
-; HASWELL-NEXT: testq %rdi, (%rsi) # sched: [6:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_test_64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: testq $665536, %rax # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: testq $665536, %rdi # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: testq $665536, (%rsi) # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [6:0.50]
-; BROADWELL-NEXT: testq %rdi, %rdi # sched: [1:0.25]
-; BROADWELL-NEXT: testq %rdi, (%rsi) # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_test_64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: testq $665536, %rax # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: testq $665536, %rdi # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: testq $665536, (%rsi) # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [6:0.50]
-; SKYLAKE-NEXT: testq %rdi, %rdi # sched: [1:0.25]
-; SKYLAKE-NEXT: testq %rdi, (%rsi) # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_test_64:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: testq $665536, %rax # imm = 0xA27C0
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: testq $665536, %rdi # imm = 0xA27C0
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: testq $665536, (%rsi) # imm = 0xA27C0
-; SKX-NEXT: # sched: [6:0.50]
-; SKX-NEXT: testq %rdi, %rdi # sched: [1:0.25]
-; SKX-NEXT: testq %rdi, (%rsi) # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_test_64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: testq $665536, %rax # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: testq $665536, %rdi # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: testq $665536, (%rsi) # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [5:0.50]
-; BDVER2-NEXT: testq %rdi, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: testq %rdi, (%rsi) # sched: [5:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_test_64:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: testq $665536, %rax # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: testq $665536, %rdi # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: testq $665536, (%rsi) # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [4:1.00]
-; BTVER2-NEXT: testq %rdi, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: testq %rdi, (%rsi) # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_test_64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: testq $665536, %rax # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: testq $665536, %rdi # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: testq $665536, (%rsi) # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [5:0.50]
-; ZNVER1-NEXT: testq %rdi, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: testq %rdi, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "testq $2, %RAX \0A\09 testq $2, $0 \0A\09 testq $2, $1 \0A\09 testq $0, $0 \0A\09 testq $0, $1", "r,*m,i"(i64 %a0, i64* %a1, i32 665536) nounwind
- ret void
-}
-
-; TODO: ud0, ud1
-define void @test_ud2() optsize {
-; GENERIC-LABEL: test_ud2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: ud2 # sched: [100:0.33]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_ud2:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: ud2 # sched: [100:0.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_ud2:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: ud2 # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_ud2:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: ud2 # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_ud2:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: ud2 # sched: [100:0.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_ud2:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: ud2 # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_ud2:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: ud2 # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_ud2:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: ud2 # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_ud2:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: ud2 # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_ud2:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: ud2 # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_ud2:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: ud2 # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "ud2", ""()
- ret void
-}
-
-; TODO - test_verr
-; TODO - test_verw
-; TODO - test_vmload
-; TODO - test_vmmcall
-; TODO - test_vmrun
-; TODO - test_vmsave
-; TODO - test_wbinvd
-
-define void @test_xadd_8(i8 %a0, i8 %a1, i8 *%a2) optsize {
-; GENERIC-LABEL: test_xadd_8:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: xaddb %dil, %sil # sched: [2:1.00]
-; GENERIC-NEXT: xaddb %dil, (%rdx) # sched: [8:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_xadd_8:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: xaddb %dil, %sil # sched: [2:1.00]
-; ATOM-NEXT: xaddb %dil, (%rdx) # sched: [3:1.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_xadd_8:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: xaddb %dil, %sil # sched: [1:0.50]
-; SLM-NEXT: xaddb %dil, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_xadd_8:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: xaddb %dil, %sil # sched: [2:1.00]
-; SANDY-NEXT: xaddb %dil, (%rdx) # sched: [8:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_xadd_8:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: xaddb %dil, %sil # sched: [2:0.75]
-; HASWELL-NEXT: xaddb %dil, (%rdx) # sched: [8:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_xadd_8:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: xaddb %dil, %sil # sched: [2:0.75]
-; BROADWELL-NEXT: xaddb %dil, (%rdx) # sched: [7:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_xadd_8:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: xaddb %dil, %sil # sched: [2:0.75]
-; SKYLAKE-NEXT: xaddb %dil, (%rdx) # sched: [7:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_xadd_8:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: xaddb %dil, %sil # sched: [2:0.75]
-; SKX-NEXT: xaddb %dil, (%rdx) # sched: [7:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_xadd_8:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: xaddb %dil, %sil # sched: [2:1.00]
-; BDVER2-NEXT: xaddb %dil, (%rdx) # sched: [6:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_xadd_8:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: xaddb %dil, %sil # sched: [1:0.50]
-; BTVER2-NEXT: xaddb %dil, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_xadd_8:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: xaddb %dil, %sil # sched: [1:0.25]
-; ZNVER1-NEXT: xaddb %dil, (%rdx) # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "xaddb $0, $1 \0A\09 xaddb $0, $2", "r,r,*m"(i8 %a0, i8 %a1, i8 *%a2) nounwind
- ret void
-}
-define void @test_xadd_16(i16 %a0, i16 %a1, i16 *%a2) optsize {
-; GENERIC-LABEL: test_xadd_16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: xaddw %di, %si # sched: [2:1.00]
-; GENERIC-NEXT: xaddw %di, (%rdx) # sched: [8:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_xadd_16:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: xaddw %di, %si # sched: [2:1.00]
-; ATOM-NEXT: xaddw %di, (%rdx) # sched: [3:1.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_xadd_16:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: xaddw %di, %si # sched: [1:0.50]
-; SLM-NEXT: xaddw %di, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_xadd_16:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: xaddw %di, %si # sched: [2:1.00]
-; SANDY-NEXT: xaddw %di, (%rdx) # sched: [8:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_xadd_16:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: xaddw %di, %si # sched: [2:0.75]
-; HASWELL-NEXT: xaddw %di, (%rdx) # sched: [8:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_xadd_16:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: xaddw %di, %si # sched: [2:0.75]
-; BROADWELL-NEXT: xaddw %di, (%rdx) # sched: [7:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_xadd_16:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: xaddw %di, %si # sched: [2:0.75]
-; SKYLAKE-NEXT: xaddw %di, (%rdx) # sched: [7:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_xadd_16:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: xaddw %di, %si # sched: [2:0.75]
-; SKX-NEXT: xaddw %di, (%rdx) # sched: [7:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_xadd_16:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: xaddw %di, %si # sched: [2:1.00]
-; BDVER2-NEXT: xaddw %di, (%rdx) # sched: [6:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_xadd_16:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: xaddw %di, %si # sched: [1:0.50]
-; BTVER2-NEXT: xaddw %di, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_xadd_16:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: xaddw %di, %si # sched: [1:0.25]
-; ZNVER1-NEXT: xaddw %di, (%rdx) # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "xaddw $0, $1 \0A\09 xaddw $0, $2", "r,r,*m"(i16 %a0, i16 %a1, i16 *%a2) nounwind
- ret void
-}
-define void @test_xadd_32(i32 %a0, i32 %a1, i32 *%a2) optsize {
-; GENERIC-LABEL: test_xadd_32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: xaddl %edi, %esi # sched: [2:1.00]
-; GENERIC-NEXT: xaddl %edi, (%rdx) # sched: [8:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_xadd_32:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: xaddl %edi, %esi # sched: [2:1.00]
-; ATOM-NEXT: xaddl %edi, (%rdx) # sched: [3:1.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_xadd_32:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: xaddl %edi, %esi # sched: [1:0.50]
-; SLM-NEXT: xaddl %edi, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_xadd_32:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: xaddl %edi, %esi # sched: [2:1.00]
-; SANDY-NEXT: xaddl %edi, (%rdx) # sched: [8:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_xadd_32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: xaddl %edi, %esi # sched: [2:0.75]
-; HASWELL-NEXT: xaddl %edi, (%rdx) # sched: [8:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_xadd_32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: xaddl %edi, %esi # sched: [2:0.75]
-; BROADWELL-NEXT: xaddl %edi, (%rdx) # sched: [7:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_xadd_32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: xaddl %edi, %esi # sched: [2:0.75]
-; SKYLAKE-NEXT: xaddl %edi, (%rdx) # sched: [7:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_xadd_32:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: xaddl %edi, %esi # sched: [2:0.75]
-; SKX-NEXT: xaddl %edi, (%rdx) # sched: [7:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_xadd_32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: xaddl %edi, %esi # sched: [2:1.00]
-; BDVER2-NEXT: xaddl %edi, (%rdx) # sched: [6:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_xadd_32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: xaddl %edi, %esi # sched: [1:0.50]
-; BTVER2-NEXT: xaddl %edi, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_xadd_32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: xaddl %edi, %esi # sched: [1:0.25]
-; ZNVER1-NEXT: xaddl %edi, (%rdx) # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "xaddl $0, $1 \0A\09 xaddl $0, $2", "r,r,*m"(i32 %a0, i32 %a1, i32 *%a2) nounwind
- ret void
-}
-define void @test_xadd_64(i64 %a0, i64 %a1, i64 *%a2) optsize {
-; GENERIC-LABEL: test_xadd_64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: xaddq %rdi, %rsi # sched: [2:1.00]
-; GENERIC-NEXT: xaddq %rdi, (%rdx) # sched: [8:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_xadd_64:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: xaddq %rdi, %rsi # sched: [2:1.00]
-; ATOM-NEXT: xaddq %rdi, (%rdx) # sched: [3:1.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_xadd_64:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: xaddq %rdi, %rsi # sched: [1:0.50]
-; SLM-NEXT: xaddq %rdi, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_xadd_64:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: xaddq %rdi, %rsi # sched: [2:1.00]
-; SANDY-NEXT: xaddq %rdi, (%rdx) # sched: [8:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_xadd_64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: xaddq %rdi, %rsi # sched: [2:0.75]
-; HASWELL-NEXT: xaddq %rdi, (%rdx) # sched: [8:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_xadd_64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: xaddq %rdi, %rsi # sched: [2:0.75]
-; BROADWELL-NEXT: xaddq %rdi, (%rdx) # sched: [7:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_xadd_64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: xaddq %rdi, %rsi # sched: [2:0.75]
-; SKYLAKE-NEXT: xaddq %rdi, (%rdx) # sched: [7:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_xadd_64:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: xaddq %rdi, %rsi # sched: [2:0.75]
-; SKX-NEXT: xaddq %rdi, (%rdx) # sched: [7:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_xadd_64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: xaddq %rdi, %rsi # sched: [2:1.00]
-; BDVER2-NEXT: xaddq %rdi, (%rdx) # sched: [6:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_xadd_64:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: xaddq %rdi, %rsi # sched: [1:0.50]
-; BTVER2-NEXT: xaddq %rdi, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_xadd_64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: xaddq %rdi, %rsi # sched: [1:0.25]
-; ZNVER1-NEXT: xaddq %rdi, (%rdx) # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "xaddq $0, $1 \0A\09 xaddq $0, $2", "r,r,*m"(i64 %a0, i64 %a1, i64 *%a2) nounwind
- ret void
-}
-
-define void @test_xchg_8(i8 %a0, i8 %a1, i8 *%a2) optsize {
-; GENERIC-LABEL: test_xchg_8:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: xchgb %sil, %dil # sched: [2:1.00]
-; GENERIC-NEXT: xchgb %dil, (%rdx) # sched: [6:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_xchg_8:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: xchgb %sil, %dil # sched: [2:1.00]
-; ATOM-NEXT: xchgb %dil, (%rdx) # sched: [3:1.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_xchg_8:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: xchgb %sil, %dil # sched: [1:0.50]
-; SLM-NEXT: xchgb %dil, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_xchg_8:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: xchgb %sil, %dil # sched: [2:1.00]
-; SANDY-NEXT: xchgb %dil, (%rdx) # sched: [6:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_xchg_8:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: xchgb %sil, %dil # sched: [2:0.75]
-; HASWELL-NEXT: xchgb %dil, (%rdx) # sched: [9:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_xchg_8:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: xchgb %sil, %dil # sched: [2:0.75]
-; BROADWELL-NEXT: xchgb %dil, (%rdx) # sched: [8:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_xchg_8:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: xchgb %sil, %dil # sched: [2:0.75]
-; SKYLAKE-NEXT: xchgb %dil, (%rdx) # sched: [10:1.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_xchg_8:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: xchgb %sil, %dil # sched: [2:0.75]
-; SKX-NEXT: xchgb %dil, (%rdx) # sched: [10:1.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_xchg_8:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: xchgb %sil, %dil # sched: [1:1.00]
-; BDVER2-NEXT: xchgb %dil, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_xchg_8:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: xchgb %sil, %dil # sched: [1:0.50]
-; BTVER2-NEXT: xchgb %dil, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_xchg_8:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: xchgb %sil, %dil # sched: [1:0.50]
-; ZNVER1-NEXT: xchgb %dil, (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "xchg $1, $0 \0A\09 xchg $2, $0", "r,r,*m"(i8 %a0, i8 %a1, i8 *%a2) nounwind
- ret void
-}
-define void @test_xchg_16(i16 %a0, i16 %a1, i16 *%a2) optsize {
-; GENERIC-LABEL: test_xchg_16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: xchgw %di, %ax # sched: [2:1.00]
-; GENERIC-NEXT: xchgw %si, %di # sched: [2:1.00]
-; GENERIC-NEXT: xchgw %di, (%rdx) # sched: [6:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_xchg_16:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: xchgw %di, %ax # sched: [2:1.00]
-; ATOM-NEXT: xchgw %si, %di # sched: [2:1.00]
-; ATOM-NEXT: xchgw %di, (%rdx) # sched: [3:1.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_xchg_16:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: xchgw %di, %ax # sched: [1:0.50]
-; SLM-NEXT: xchgw %si, %di # sched: [1:0.50]
-; SLM-NEXT: xchgw %di, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_xchg_16:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: xchgw %di, %ax # sched: [2:1.00]
-; SANDY-NEXT: xchgw %si, %di # sched: [2:1.00]
-; SANDY-NEXT: xchgw %di, (%rdx) # sched: [6:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_xchg_16:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: xchgw %di, %ax # sched: [2:0.75]
-; HASWELL-NEXT: xchgw %si, %di # sched: [2:0.75]
-; HASWELL-NEXT: xchgw %di, (%rdx) # sched: [9:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_xchg_16:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: xchgw %di, %ax # sched: [2:0.75]
-; BROADWELL-NEXT: xchgw %si, %di # sched: [2:0.75]
-; BROADWELL-NEXT: xchgw %di, (%rdx) # sched: [8:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_xchg_16:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: xchgw %di, %ax # sched: [2:0.75]
-; SKYLAKE-NEXT: xchgw %si, %di # sched: [2:0.75]
-; SKYLAKE-NEXT: xchgw %di, (%rdx) # sched: [10:1.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_xchg_16:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: xchgw %di, %ax # sched: [2:0.75]
-; SKX-NEXT: xchgw %si, %di # sched: [2:0.75]
-; SKX-NEXT: xchgw %di, (%rdx) # sched: [10:1.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_xchg_16:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: xchgw %di, %ax # sched: [1:1.00]
-; BDVER2-NEXT: xchgw %si, %di # sched: [2:1.00]
-; BDVER2-NEXT: xchgw %di, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_xchg_16:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: xchgw %di, %ax # sched: [1:0.50]
-; BTVER2-NEXT: xchgw %si, %di # sched: [1:0.50]
-; BTVER2-NEXT: xchgw %di, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_xchg_16:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: xchgw %di, %ax # sched: [1:0.50]
-; ZNVER1-NEXT: xchgw %si, %di # sched: [1:0.50]
-; ZNVER1-NEXT: xchgw %di, (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "xchg %AX, $0 \0A\09 xchg $1, $0 \0A\09 xchg $2, $0", "r,r,*m"(i16 %a0, i16 %a1, i16 *%a2) nounwind
- ret void
-}
-define void @test_xchg_32(i32 %a0, i32 %a1, i32 *%a2) optsize {
-; GENERIC-LABEL: test_xchg_32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: xchgl %edi, %eax # sched: [2:1.00]
-; GENERIC-NEXT: xchgl %esi, %edi # sched: [2:1.00]
-; GENERIC-NEXT: xchgl %edi, (%rdx) # sched: [6:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_xchg_32:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: xchgl %edi, %eax # sched: [2:1.00]
-; ATOM-NEXT: xchgl %esi, %edi # sched: [2:1.00]
-; ATOM-NEXT: xchgl %edi, (%rdx) # sched: [3:1.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_xchg_32:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: xchgl %edi, %eax # sched: [1:0.50]
-; SLM-NEXT: xchgl %esi, %edi # sched: [1:0.50]
-; SLM-NEXT: xchgl %edi, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_xchg_32:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: xchgl %edi, %eax # sched: [2:1.00]
-; SANDY-NEXT: xchgl %esi, %edi # sched: [2:1.00]
-; SANDY-NEXT: xchgl %edi, (%rdx) # sched: [6:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_xchg_32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: xchgl %edi, %eax # sched: [2:0.75]
-; HASWELL-NEXT: xchgl %esi, %edi # sched: [2:0.75]
-; HASWELL-NEXT: xchgl %edi, (%rdx) # sched: [9:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_xchg_32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: xchgl %edi, %eax # sched: [2:0.75]
-; BROADWELL-NEXT: xchgl %esi, %edi # sched: [2:0.75]
-; BROADWELL-NEXT: xchgl %edi, (%rdx) # sched: [8:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_xchg_32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: xchgl %edi, %eax # sched: [2:0.75]
-; SKYLAKE-NEXT: xchgl %esi, %edi # sched: [2:0.75]
-; SKYLAKE-NEXT: xchgl %edi, (%rdx) # sched: [10:1.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_xchg_32:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: xchgl %edi, %eax # sched: [2:0.75]
-; SKX-NEXT: xchgl %esi, %edi # sched: [2:0.75]
-; SKX-NEXT: xchgl %edi, (%rdx) # sched: [10:1.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_xchg_32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: xchgl %edi, %eax # sched: [1:1.00]
-; BDVER2-NEXT: xchgl %esi, %edi # sched: [1:1.00]
-; BDVER2-NEXT: xchgl %edi, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_xchg_32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: xchgl %edi, %eax # sched: [1:0.50]
-; BTVER2-NEXT: xchgl %esi, %edi # sched: [1:0.50]
-; BTVER2-NEXT: xchgl %edi, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_xchg_32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: xchgl %edi, %eax # sched: [1:0.50]
-; ZNVER1-NEXT: xchgl %esi, %edi # sched: [1:0.50]
-; ZNVER1-NEXT: xchgl %edi, (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "xchg %EAX, $0 \0A\09 xchg $1, $0 \0A\09 xchg $2, $0", "r,r,*m"(i32 %a0, i32 %a1, i32 *%a2) nounwind
- ret void
-}
-define void @test_xchg_64(i64 %a0, i64 %a1, i64 *%a2) optsize {
-; GENERIC-LABEL: test_xchg_64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: xchgq %rdi, %rax # sched: [2:1.00]
-; GENERIC-NEXT: xchgq %rsi, %rdi # sched: [2:1.00]
-; GENERIC-NEXT: xchgq %rdi, (%rdx) # sched: [6:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_xchg_64:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: xchgq %rdi, %rax # sched: [2:1.00]
-; ATOM-NEXT: xchgq %rsi, %rdi # sched: [2:1.00]
-; ATOM-NEXT: xchgq %rdi, (%rdx) # sched: [3:1.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_xchg_64:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: xchgq %rdi, %rax # sched: [1:0.50]
-; SLM-NEXT: xchgq %rsi, %rdi # sched: [1:0.50]
-; SLM-NEXT: xchgq %rdi, (%rdx) # sched: [4:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_xchg_64:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: xchgq %rdi, %rax # sched: [2:1.00]
-; SANDY-NEXT: xchgq %rsi, %rdi # sched: [2:1.00]
-; SANDY-NEXT: xchgq %rdi, (%rdx) # sched: [6:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_xchg_64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: xchgq %rdi, %rax # sched: [2:0.75]
-; HASWELL-NEXT: xchgq %rsi, %rdi # sched: [2:0.75]
-; HASWELL-NEXT: xchgq %rdi, (%rdx) # sched: [9:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_xchg_64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: xchgq %rdi, %rax # sched: [2:0.75]
-; BROADWELL-NEXT: xchgq %rsi, %rdi # sched: [2:0.75]
-; BROADWELL-NEXT: xchgq %rdi, (%rdx) # sched: [8:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_xchg_64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: xchgq %rdi, %rax # sched: [2:0.75]
-; SKYLAKE-NEXT: xchgq %rsi, %rdi # sched: [2:0.75]
-; SKYLAKE-NEXT: xchgq %rdi, (%rdx) # sched: [10:1.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_xchg_64:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: xchgq %rdi, %rax # sched: [2:0.75]
-; SKX-NEXT: xchgq %rsi, %rdi # sched: [2:0.75]
-; SKX-NEXT: xchgq %rdi, (%rdx) # sched: [10:1.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_xchg_64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: xchgq %rdi, %rax # sched: [1:1.00]
-; BDVER2-NEXT: xchgq %rsi, %rdi # sched: [1:1.00]
-; BDVER2-NEXT: xchgq %rdi, (%rdx) # sched: [5:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_xchg_64:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: xchgq %rdi, %rax # sched: [1:0.50]
-; BTVER2-NEXT: xchgq %rsi, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: xchgq %rdi, (%rdx) # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_xchg_64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: xchgq %rdi, %rax # sched: [1:0.50]
-; ZNVER1-NEXT: xchgq %rsi, %rdi # sched: [1:0.50]
-; ZNVER1-NEXT: xchgq %rdi, (%rdx) # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "xchg %RAX, $0 \0A\09 xchg $1, $0 \0A\09 xchg $2, $0", "r,r,*m"(i64 %a0, i64 %a1, i64 *%a2) nounwind
- ret void
-}
-
-define void @test_xlat() optsize {
-; GENERIC-LABEL: test_xlat:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: xlatb # sched: [5:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_xlat:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: xlatb # sched: [6:3.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_xlat:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: xlatb # sched: [3:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_xlat:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: xlatb # sched: [5:0.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_xlat:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: xlatb # sched: [7:0.75]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_xlat:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: xlatb # sched: [5:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_xlat:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: xlatb # sched: [5:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_xlat:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: xlatb # sched: [5:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_xlat:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: xlatb # sched: [6:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_xlat:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: xlatb # sched: [3:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_xlat:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: xlatb # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "xlat", ""() nounwind
- ret void
-}
-
-define void @test_xor_8(i8 %a0, i8* %a1, i8 %a2) optsize {
-; GENERIC-LABEL: test_xor_8:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: xorb $7, %al # sched: [1:0.33]
-; GENERIC-NEXT: xorb $7, %dil # sched: [1:0.33]
-; GENERIC-NEXT: xorb $7, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: xorb %dl, %dil # sched: [1:0.33]
-; GENERIC-NEXT: xorb %dil, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: xorb (%rsi), %dil # sched: [6:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_xor_8:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: xorb $7, %al # sched: [1:0.50]
-; ATOM-NEXT: xorb $7, %dil # sched: [1:0.50]
-; ATOM-NEXT: xorb $7, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: xorb %dl, %dil # sched: [1:0.50]
-; ATOM-NEXT: xorb %dil, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: xorb (%rsi), %dil # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_xor_8:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: xorb $7, %al # sched: [1:0.50]
-; SLM-NEXT: xorb $7, %dil # sched: [1:0.50]
-; SLM-NEXT: xorb $7, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: xorb %dl, %dil # sched: [1:0.50]
-; SLM-NEXT: xorb %dil, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: xorb (%rsi), %dil # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_xor_8:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: xorb $7, %al # sched: [1:0.33]
-; SANDY-NEXT: xorb $7, %dil # sched: [1:0.33]
-; SANDY-NEXT: xorb $7, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: xorb %dl, %dil # sched: [1:0.33]
-; SANDY-NEXT: xorb %dil, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: xorb (%rsi), %dil # sched: [6:0.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_xor_8:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: xorb $7, %al # sched: [1:0.25]
-; HASWELL-NEXT: xorb $7, %dil # sched: [1:0.25]
-; HASWELL-NEXT: xorb $7, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: xorb %dl, %dil # sched: [1:0.25]
-; HASWELL-NEXT: xorb %dil, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: xorb (%rsi), %dil # sched: [6:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_xor_8:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: xorb $7, %al # sched: [1:0.25]
-; BROADWELL-NEXT: xorb $7, %dil # sched: [1:0.25]
-; BROADWELL-NEXT: xorb $7, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: xorb %dl, %dil # sched: [1:0.25]
-; BROADWELL-NEXT: xorb %dil, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: xorb (%rsi), %dil # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_xor_8:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: xorb $7, %al # sched: [1:0.25]
-; SKYLAKE-NEXT: xorb $7, %dil # sched: [1:0.25]
-; SKYLAKE-NEXT: xorb $7, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: xorb %dl, %dil # sched: [1:0.25]
-; SKYLAKE-NEXT: xorb %dil, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: xorb (%rsi), %dil # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_xor_8:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: xorb $7, %al # sched: [1:0.25]
-; SKX-NEXT: xorb $7, %dil # sched: [1:0.25]
-; SKX-NEXT: xorb $7, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: xorb %dl, %dil # sched: [1:0.25]
-; SKX-NEXT: xorb %dil, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: xorb (%rsi), %dil # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_xor_8:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: xorb $7, %al # sched: [1:0.50]
-; BDVER2-NEXT: xorb $7, %dil # sched: [1:0.50]
-; BDVER2-NEXT: xorb $7, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: xorb %dl, %dil # sched: [1:0.50]
-; BDVER2-NEXT: xorb %dil, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: xorb (%rsi), %dil # sched: [5:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_xor_8:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: xorb $7, %al # sched: [1:0.50]
-; BTVER2-NEXT: xorb $7, %dil # sched: [1:0.50]
-; BTVER2-NEXT: xorb $7, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: xorb %dl, %dil # sched: [1:0.50]
-; BTVER2-NEXT: xorb %dil, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: xorb (%rsi), %dil # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_xor_8:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: xorb $7, %al # sched: [1:0.25]
-; ZNVER1-NEXT: xorb $7, %dil # sched: [1:0.25]
-; ZNVER1-NEXT: xorb $7, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: xorb %dl, %dil # sched: [1:0.25]
-; ZNVER1-NEXT: xorb %dil, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: xorb (%rsi), %dil # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "xorb $3, %AL \0A\09 xorb $3, $0 \0A\09 xorb $3, $2 \0A\09 xorb $1, $0 \0A\09 xorb $0, $2 \0A\09 xorb $2, $0", "r,r,*m,i"(i8 %a0, i8 %a2, i8* %a1, i8 7) nounwind
- ret void
-}
-define void @test_xor_16(i16 %a0, i16* %a1, i16 %a2) optsize {
-; GENERIC-LABEL: test_xor_16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: xorw $511, %ax # imm = 0x1FF
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: xorw $511, %di # imm = 0x1FF
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: xorw $511, (%rsi) # imm = 0x1FF
-; GENERIC-NEXT: # sched: [7:1.00]
-; GENERIC-NEXT: xorw $7, %di # sched: [1:0.33]
-; GENERIC-NEXT: xorw $7, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: xorw %dx, %di # sched: [1:0.33]
-; GENERIC-NEXT: xorw %di, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: xorw (%rsi), %di # sched: [6:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_xor_16:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: xorw $511, %ax # imm = 0x1FF
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: xorw $511, %di # imm = 0x1FF
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: xorw $511, (%rsi) # imm = 0x1FF
-; ATOM-NEXT: # sched: [1:1.00]
-; ATOM-NEXT: xorw $7, %di # sched: [1:0.50]
-; ATOM-NEXT: xorw $7, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: xorw %dx, %di # sched: [1:0.50]
-; ATOM-NEXT: xorw %di, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: xorw (%rsi), %di # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_xor_16:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: xorw $511, %ax # imm = 0x1FF
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: xorw $511, %di # imm = 0x1FF
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: xorw $511, (%rsi) # imm = 0x1FF
-; SLM-NEXT: # sched: [5:2.00]
-; SLM-NEXT: xorw $7, %di # sched: [1:0.50]
-; SLM-NEXT: xorw $7, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: xorw %dx, %di # sched: [1:0.50]
-; SLM-NEXT: xorw %di, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: xorw (%rsi), %di # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_xor_16:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: xorw $511, %ax # imm = 0x1FF
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: xorw $511, %di # imm = 0x1FF
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: xorw $511, (%rsi) # imm = 0x1FF
-; SANDY-NEXT: # sched: [7:1.00]
-; SANDY-NEXT: xorw $7, %di # sched: [1:0.33]
-; SANDY-NEXT: xorw $7, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: xorw %dx, %di # sched: [1:0.33]
-; SANDY-NEXT: xorw %di, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: xorw (%rsi), %di # sched: [6:0.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_xor_16:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: xorw $511, %ax # imm = 0x1FF
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: xorw $511, %di # imm = 0x1FF
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: xorw $511, (%rsi) # imm = 0x1FF
-; HASWELL-NEXT: # sched: [7:1.00]
-; HASWELL-NEXT: xorw $7, %di # sched: [1:0.25]
-; HASWELL-NEXT: xorw $7, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: xorw %dx, %di # sched: [1:0.25]
-; HASWELL-NEXT: xorw %di, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: xorw (%rsi), %di # sched: [6:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_xor_16:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: xorw $511, %ax # imm = 0x1FF
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: xorw $511, %di # imm = 0x1FF
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: xorw $511, (%rsi) # imm = 0x1FF
-; BROADWELL-NEXT: # sched: [7:1.00]
-; BROADWELL-NEXT: xorw $7, %di # sched: [1:0.25]
-; BROADWELL-NEXT: xorw $7, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: xorw %dx, %di # sched: [1:0.25]
-; BROADWELL-NEXT: xorw %di, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: xorw (%rsi), %di # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_xor_16:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: xorw $511, %ax # imm = 0x1FF
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: xorw $511, %di # imm = 0x1FF
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: xorw $511, (%rsi) # imm = 0x1FF
-; SKYLAKE-NEXT: # sched: [7:1.00]
-; SKYLAKE-NEXT: xorw $7, %di # sched: [1:0.25]
-; SKYLAKE-NEXT: xorw $7, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: xorw %dx, %di # sched: [1:0.25]
-; SKYLAKE-NEXT: xorw %di, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: xorw (%rsi), %di # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_xor_16:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: xorw $511, %ax # imm = 0x1FF
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: xorw $511, %di # imm = 0x1FF
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: xorw $511, (%rsi) # imm = 0x1FF
-; SKX-NEXT: # sched: [7:1.00]
-; SKX-NEXT: xorw $7, %di # sched: [1:0.25]
-; SKX-NEXT: xorw $7, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: xorw %dx, %di # sched: [1:0.25]
-; SKX-NEXT: xorw %di, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: xorw (%rsi), %di # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_xor_16:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: xorw $511, %ax # imm = 0x1FF
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: xorw $511, %di # imm = 0x1FF
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: xorw $511, (%rsi) # imm = 0x1FF
-; BDVER2-NEXT: # sched: [6:1.00]
-; BDVER2-NEXT: xorw $7, %di # sched: [1:0.50]
-; BDVER2-NEXT: xorw $7, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: xorw %dx, %di # sched: [1:0.50]
-; BDVER2-NEXT: xorw %di, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: xorw (%rsi), %di # sched: [5:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_xor_16:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: xorw $511, %ax # imm = 0x1FF
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: xorw $511, %di # imm = 0x1FF
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: xorw $511, (%rsi) # imm = 0x1FF
-; BTVER2-NEXT: # sched: [5:1.00]
-; BTVER2-NEXT: xorw $7, %di # sched: [1:0.50]
-; BTVER2-NEXT: xorw $7, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: xorw %dx, %di # sched: [1:0.50]
-; BTVER2-NEXT: xorw %di, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: xorw (%rsi), %di # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_xor_16:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: xorw $511, %ax # imm = 0x1FF
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: xorw $511, %di # imm = 0x1FF
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: xorw $511, (%rsi) # imm = 0x1FF
-; ZNVER1-NEXT: # sched: [5:0.50]
-; ZNVER1-NEXT: xorw $7, %di # sched: [1:0.25]
-; ZNVER1-NEXT: xorw $7, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: xorw %dx, %di # sched: [1:0.25]
-; ZNVER1-NEXT: xorw %di, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: xorw (%rsi), %di # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "xorw $3, %AX \0A\09 xorw $3, $0 \0A\09 xorw $3, $2 \0A\09 xorw $4, $0 \0A\09 xorw $4, $2 \0A\09 xorw $1, $0 \0A\09 xorw $0, $2 \0A\09 xorw $2, $0", "r,r,*m,i,i"(i16 %a0, i16 %a2, i16* %a1, i16 511, i8 7) nounwind
- ret void
-}
-define void @test_xor_32(i32 %a0, i32* %a1, i32 %a2) optsize {
-; GENERIC-LABEL: test_xor_32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: xorl $665536, %eax # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: xorl $665536, %edi # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: xorl $665536, (%rsi) # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [7:1.00]
-; GENERIC-NEXT: xorl $7, %edi # sched: [1:0.33]
-; GENERIC-NEXT: xorl $7, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: xorl %edx, %edi # sched: [1:0.33]
-; GENERIC-NEXT: xorl %edi, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: xorl (%rsi), %edi # sched: [6:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_xor_32:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: xorl $665536, %eax # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: xorl $665536, %edi # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: xorl $665536, (%rsi) # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:1.00]
-; ATOM-NEXT: xorl $7, %edi # sched: [1:0.50]
-; ATOM-NEXT: xorl $7, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: xorl %edx, %edi # sched: [1:0.50]
-; ATOM-NEXT: xorl %edi, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: xorl (%rsi), %edi # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_xor_32:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: xorl $665536, %eax # imm = 0xA27C0
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: xorl $665536, %edi # imm = 0xA27C0
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: xorl $665536, (%rsi) # imm = 0xA27C0
-; SLM-NEXT: # sched: [5:2.00]
-; SLM-NEXT: xorl $7, %edi # sched: [1:0.50]
-; SLM-NEXT: xorl $7, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: xorl %edx, %edi # sched: [1:0.50]
-; SLM-NEXT: xorl %edi, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: xorl (%rsi), %edi # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_xor_32:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: xorl $665536, %eax # imm = 0xA27C0
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: xorl $665536, %edi # imm = 0xA27C0
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: xorl $665536, (%rsi) # imm = 0xA27C0
-; SANDY-NEXT: # sched: [7:1.00]
-; SANDY-NEXT: xorl $7, %edi # sched: [1:0.33]
-; SANDY-NEXT: xorl $7, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: xorl %edx, %edi # sched: [1:0.33]
-; SANDY-NEXT: xorl %edi, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: xorl (%rsi), %edi # sched: [6:0.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_xor_32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: xorl $665536, %eax # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: xorl $665536, %edi # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: xorl $665536, (%rsi) # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [7:1.00]
-; HASWELL-NEXT: xorl $7, %edi # sched: [1:0.25]
-; HASWELL-NEXT: xorl $7, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: xorl %edx, %edi # sched: [1:0.25]
-; HASWELL-NEXT: xorl %edi, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: xorl (%rsi), %edi # sched: [6:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_xor_32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: xorl $665536, %eax # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: xorl $665536, %edi # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: xorl $665536, (%rsi) # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [7:1.00]
-; BROADWELL-NEXT: xorl $7, %edi # sched: [1:0.25]
-; BROADWELL-NEXT: xorl $7, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: xorl %edx, %edi # sched: [1:0.25]
-; BROADWELL-NEXT: xorl %edi, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: xorl (%rsi), %edi # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_xor_32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: xorl $665536, %eax # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: xorl $665536, %edi # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: xorl $665536, (%rsi) # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [7:1.00]
-; SKYLAKE-NEXT: xorl $7, %edi # sched: [1:0.25]
-; SKYLAKE-NEXT: xorl $7, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: xorl %edx, %edi # sched: [1:0.25]
-; SKYLAKE-NEXT: xorl %edi, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: xorl (%rsi), %edi # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_xor_32:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: xorl $665536, %eax # imm = 0xA27C0
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: xorl $665536, %edi # imm = 0xA27C0
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: xorl $665536, (%rsi) # imm = 0xA27C0
-; SKX-NEXT: # sched: [7:1.00]
-; SKX-NEXT: xorl $7, %edi # sched: [1:0.25]
-; SKX-NEXT: xorl $7, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: xorl %edx, %edi # sched: [1:0.25]
-; SKX-NEXT: xorl %edi, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: xorl (%rsi), %edi # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_xor_32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: xorl $665536, %eax # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: xorl $665536, %edi # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: xorl $665536, (%rsi) # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [6:1.00]
-; BDVER2-NEXT: xorl $7, %edi # sched: [1:0.50]
-; BDVER2-NEXT: xorl $7, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: xorl %edx, %edi # sched: [1:0.50]
-; BDVER2-NEXT: xorl %edi, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: xorl (%rsi), %edi # sched: [5:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_xor_32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: xorl $665536, %eax # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: xorl $665536, %edi # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: xorl $665536, (%rsi) # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [5:1.00]
-; BTVER2-NEXT: xorl $7, %edi # sched: [1:0.50]
-; BTVER2-NEXT: xorl $7, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: xorl %edx, %edi # sched: [1:0.50]
-; BTVER2-NEXT: xorl %edi, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: xorl (%rsi), %edi # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_xor_32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: xorl $665536, %eax # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: xorl $665536, %edi # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: xorl $665536, (%rsi) # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [5:0.50]
-; ZNVER1-NEXT: xorl $7, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: xorl $7, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: xorl %edx, %edi # sched: [1:0.25]
-; ZNVER1-NEXT: xorl %edi, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: xorl (%rsi), %edi # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "xorl $3, %EAX \0A\09 xorl $3, $0 \0A\09 xorl $3, $2 \0A\09 xorl $4, $0 \0A\09 xorl $4, $2 \0A\09 xorl $1, $0 \0A\09 xorl $0, $2 \0A\09 xorl $2, $0", "r,r,*m,i,i"(i32 %a0, i32 %a2, i32* %a1, i32 665536, i8 7) nounwind
- ret void
-}
-define void @test_xor_64(i64 %a0, i64* %a1, i64 %a2) optsize {
-; GENERIC-LABEL: test_xor_64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: xorq $665536, %rax # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: xorq $665536, %rdi # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [1:0.33]
-; GENERIC-NEXT: xorq $665536, (%rsi) # imm = 0xA27C0
-; GENERIC-NEXT: # sched: [7:1.00]
-; GENERIC-NEXT: xorq $7, %rdi # sched: [1:0.33]
-; GENERIC-NEXT: xorq $7, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: xorq %rdx, %rdi # sched: [1:0.33]
-; GENERIC-NEXT: xorq %rdi, (%rsi) # sched: [7:1.00]
-; GENERIC-NEXT: xorq (%rsi), %rdi # sched: [6:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_xor_64:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: xorq $665536, %rax # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: xorq $665536, %rdi # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:0.50]
-; ATOM-NEXT: xorq $665536, (%rsi) # imm = 0xA27C0
-; ATOM-NEXT: # sched: [1:1.00]
-; ATOM-NEXT: xorq $7, %rdi # sched: [1:0.50]
-; ATOM-NEXT: xorq $7, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: xorq %rdx, %rdi # sched: [1:0.50]
-; ATOM-NEXT: xorq %rdi, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: xorq (%rsi), %rdi # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_xor_64:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: xorq $665536, %rax # imm = 0xA27C0
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: xorq $665536, %rdi # imm = 0xA27C0
-; SLM-NEXT: # sched: [1:0.50]
-; SLM-NEXT: xorq $665536, (%rsi) # imm = 0xA27C0
-; SLM-NEXT: # sched: [5:2.00]
-; SLM-NEXT: xorq $7, %rdi # sched: [1:0.50]
-; SLM-NEXT: xorq $7, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: xorq %rdx, %rdi # sched: [1:0.50]
-; SLM-NEXT: xorq %rdi, (%rsi) # sched: [5:2.00]
-; SLM-NEXT: xorq (%rsi), %rdi # sched: [4:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-LABEL: test_xor_64:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: xorq $665536, %rax # imm = 0xA27C0
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: xorq $665536, %rdi # imm = 0xA27C0
-; SANDY-NEXT: # sched: [1:0.33]
-; SANDY-NEXT: xorq $665536, (%rsi) # imm = 0xA27C0
-; SANDY-NEXT: # sched: [7:1.00]
-; SANDY-NEXT: xorq $7, %rdi # sched: [1:0.33]
-; SANDY-NEXT: xorq $7, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: xorq %rdx, %rdi # sched: [1:0.33]
-; SANDY-NEXT: xorq %rdi, (%rsi) # sched: [7:1.00]
-; SANDY-NEXT: xorq (%rsi), %rdi # sched: [6:0.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-LABEL: test_xor_64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: xorq $665536, %rax # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: xorq $665536, %rdi # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [1:0.25]
-; HASWELL-NEXT: xorq $665536, (%rsi) # imm = 0xA27C0
-; HASWELL-NEXT: # sched: [7:1.00]
-; HASWELL-NEXT: xorq $7, %rdi # sched: [1:0.25]
-; HASWELL-NEXT: xorq $7, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: xorq %rdx, %rdi # sched: [1:0.25]
-; HASWELL-NEXT: xorq %rdi, (%rsi) # sched: [7:1.00]
-; HASWELL-NEXT: xorq (%rsi), %rdi # sched: [6:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_xor_64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: xorq $665536, %rax # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: xorq $665536, %rdi # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [1:0.25]
-; BROADWELL-NEXT: xorq $665536, (%rsi) # imm = 0xA27C0
-; BROADWELL-NEXT: # sched: [7:1.00]
-; BROADWELL-NEXT: xorq $7, %rdi # sched: [1:0.25]
-; BROADWELL-NEXT: xorq $7, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: xorq %rdx, %rdi # sched: [1:0.25]
-; BROADWELL-NEXT: xorq %rdi, (%rsi) # sched: [7:1.00]
-; BROADWELL-NEXT: xorq (%rsi), %rdi # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_xor_64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: xorq $665536, %rax # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: xorq $665536, %rdi # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [1:0.25]
-; SKYLAKE-NEXT: xorq $665536, (%rsi) # imm = 0xA27C0
-; SKYLAKE-NEXT: # sched: [7:1.00]
-; SKYLAKE-NEXT: xorq $7, %rdi # sched: [1:0.25]
-; SKYLAKE-NEXT: xorq $7, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: xorq %rdx, %rdi # sched: [1:0.25]
-; SKYLAKE-NEXT: xorq %rdi, (%rsi) # sched: [7:1.00]
-; SKYLAKE-NEXT: xorq (%rsi), %rdi # sched: [6:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_xor_64:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: xorq $665536, %rax # imm = 0xA27C0
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: xorq $665536, %rdi # imm = 0xA27C0
-; SKX-NEXT: # sched: [1:0.25]
-; SKX-NEXT: xorq $665536, (%rsi) # imm = 0xA27C0
-; SKX-NEXT: # sched: [7:1.00]
-; SKX-NEXT: xorq $7, %rdi # sched: [1:0.25]
-; SKX-NEXT: xorq $7, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: xorq %rdx, %rdi # sched: [1:0.25]
-; SKX-NEXT: xorq %rdi, (%rsi) # sched: [7:1.00]
-; SKX-NEXT: xorq (%rsi), %rdi # sched: [6:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-LABEL: test_xor_64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: xorq $665536, %rax # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: xorq $665536, %rdi # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [1:0.50]
-; BDVER2-NEXT: xorq $665536, (%rsi) # imm = 0xA27C0
-; BDVER2-NEXT: # sched: [6:1.00]
-; BDVER2-NEXT: xorq $7, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: xorq $7, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: xorq %rdx, %rdi # sched: [1:0.50]
-; BDVER2-NEXT: xorq %rdi, (%rsi) # sched: [6:1.00]
-; BDVER2-NEXT: xorq (%rsi), %rdi # sched: [5:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_xor_64:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: xorq $665536, %rax # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: xorq $665536, %rdi # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [1:0.50]
-; BTVER2-NEXT: xorq $665536, (%rsi) # imm = 0xA27C0
-; BTVER2-NEXT: # sched: [5:1.00]
-; BTVER2-NEXT: xorq $7, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: xorq $7, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: xorq %rdx, %rdi # sched: [1:0.50]
-; BTVER2-NEXT: xorq %rdi, (%rsi) # sched: [5:1.00]
-; BTVER2-NEXT: xorq (%rsi), %rdi # sched: [4:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_xor_64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: xorq $665536, %rax # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: xorq $665536, %rdi # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [1:0.25]
-; ZNVER1-NEXT: xorq $665536, (%rsi) # imm = 0xA27C0
-; ZNVER1-NEXT: # sched: [5:0.50]
-; ZNVER1-NEXT: xorq $7, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: xorq $7, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: xorq %rdx, %rdi # sched: [1:0.25]
-; ZNVER1-NEXT: xorq %rdi, (%rsi) # sched: [5:0.50]
-; ZNVER1-NEXT: xorq (%rsi), %rdi # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm "xorq $3, %RAX \0A\09 xorq $3, $0 \0A\09 xorq $3, $2 \0A\09 xorq $4, $0 \0A\09 xorq $4, $2 \0A\09 xorq $1, $0 \0A\09 xorq $0, $2 \0A\09 xorq $2, $0", "r,r,*m,i,i"(i64 %a0, i64 %a2, i64* %a1, i32 665536, i8 7) nounwind
- ret void
-}
diff --git a/llvm/test/CodeGen/X86/sha-schedule.ll b/llvm/test/CodeGen/X86/sha-schedule.ll
deleted file mode 100644
index 7069bd3a119..00000000000
--- a/llvm/test/CodeGen/X86/sha-schedule.ll
+++ /dev/null
@@ -1,242 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sha | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=goldmont | FileCheck %s --check-prefix=CHECK --check-prefix=GOLDMONT
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=cannonlake | FileCheck %s --check-prefix=CHECK --check-prefix=CANNONLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
-
-;
-; SHA1
-;
-
-define <4 x i32> @test_sha1msg1(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_sha1msg1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: sha1msg1 %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: sha1msg1 (%rdi), %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; GOLDMONT-LABEL: test_sha1msg1:
-; GOLDMONT: # %bb.0:
-; GOLDMONT-NEXT: sha1msg1 %xmm1, %xmm0 # sched: [4:1.00]
-; GOLDMONT-NEXT: sha1msg1 (%rdi), %xmm0 # sched: [7:1.00]
-; GOLDMONT-NEXT: retq # sched: [4:1.00]
-;
-; CANNONLAKE-LABEL: test_sha1msg1:
-; CANNONLAKE: # %bb.0:
-; CANNONLAKE-NEXT: sha1msg1 %xmm1, %xmm0 # sched: [4:0.50]
-; CANNONLAKE-NEXT: sha1msg1 (%rdi), %xmm0 # sched: [10:0.50]
-; CANNONLAKE-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_sha1msg1:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: sha1msg1 %xmm1, %xmm0 # sched: [2:1.00]
-; ZNVER1-NEXT: sha1msg1 (%rdi), %xmm0 # sched: [9:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load <4 x i32>, <4 x i32>* %a2
- %2 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a0, <4 x i32> %a1)
- %3 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %2, <4 x i32> %1)
- ret <4 x i32> %3
-}
-declare <4 x i32> @llvm.x86.sha1msg1(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_sha1msg2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_sha1msg2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: sha1msg2 %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: sha1msg2 (%rdi), %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; GOLDMONT-LABEL: test_sha1msg2:
-; GOLDMONT: # %bb.0:
-; GOLDMONT-NEXT: sha1msg2 %xmm1, %xmm0 # sched: [4:1.00]
-; GOLDMONT-NEXT: sha1msg2 (%rdi), %xmm0 # sched: [7:1.00]
-; GOLDMONT-NEXT: retq # sched: [4:1.00]
-;
-; CANNONLAKE-LABEL: test_sha1msg2:
-; CANNONLAKE: # %bb.0:
-; CANNONLAKE-NEXT: sha1msg2 %xmm1, %xmm0 # sched: [4:0.50]
-; CANNONLAKE-NEXT: sha1msg2 (%rdi), %xmm0 # sched: [10:0.50]
-; CANNONLAKE-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_sha1msg2:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: sha1msg2 %xmm1, %xmm0 # sched: [1:0.50]
-; ZNVER1-NEXT: sha1msg2 (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load <4 x i32>, <4 x i32>* %a2
- %2 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a0, <4 x i32> %a1)
- %3 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %2, <4 x i32> %1)
- ret <4 x i32> %3
-}
-declare <4 x i32> @llvm.x86.sha1msg2(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_sha1nexte(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_sha1nexte:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: sha1nexte %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: sha1nexte (%rdi), %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; GOLDMONT-LABEL: test_sha1nexte:
-; GOLDMONT: # %bb.0:
-; GOLDMONT-NEXT: sha1nexte %xmm1, %xmm0 # sched: [4:1.00]
-; GOLDMONT-NEXT: sha1nexte (%rdi), %xmm0 # sched: [7:1.00]
-; GOLDMONT-NEXT: retq # sched: [4:1.00]
-;
-; CANNONLAKE-LABEL: test_sha1nexte:
-; CANNONLAKE: # %bb.0:
-; CANNONLAKE-NEXT: sha1nexte %xmm1, %xmm0 # sched: [4:0.50]
-; CANNONLAKE-NEXT: sha1nexte (%rdi), %xmm0 # sched: [10:0.50]
-; CANNONLAKE-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_sha1nexte:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: sha1nexte %xmm1, %xmm0 # sched: [1:1.00]
-; ZNVER1-NEXT: sha1nexte (%rdi), %xmm0 # sched: [8:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load <4 x i32>, <4 x i32>* %a2
- %2 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a0, <4 x i32> %a1)
- %3 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %2, <4 x i32> %1)
- ret <4 x i32> %3
-}
-declare <4 x i32> @llvm.x86.sha1nexte(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_sha1rnds4(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_sha1rnds4:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; GOLDMONT-LABEL: test_sha1rnds4:
-; GOLDMONT: # %bb.0:
-; GOLDMONT-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # sched: [4:1.00]
-; GOLDMONT-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # sched: [7:1.00]
-; GOLDMONT-NEXT: retq # sched: [4:1.00]
-;
-; CANNONLAKE-LABEL: test_sha1rnds4:
-; CANNONLAKE: # %bb.0:
-; CANNONLAKE-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # sched: [4:0.50]
-; CANNONLAKE-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # sched: [10:0.50]
-; CANNONLAKE-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_sha1rnds4:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # sched: [6:1.00]
-; ZNVER1-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # sched: [13:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load <4 x i32>, <4 x i32>* %a2
- %2 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a0, <4 x i32> %a1, i8 3)
- %3 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %2, <4 x i32> %1, i8 3)
- ret <4 x i32> %3
-}
-declare <4 x i32> @llvm.x86.sha1rnds4(<4 x i32>, <4 x i32>, i8)
-
-;
-; SHA256
-;
-
-define <4 x i32> @test_sha256msg1(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_sha256msg1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: sha256msg1 %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: sha256msg1 (%rdi), %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; GOLDMONT-LABEL: test_sha256msg1:
-; GOLDMONT: # %bb.0:
-; GOLDMONT-NEXT: sha256msg1 %xmm1, %xmm0 # sched: [4:1.00]
-; GOLDMONT-NEXT: sha256msg1 (%rdi), %xmm0 # sched: [7:1.00]
-; GOLDMONT-NEXT: retq # sched: [4:1.00]
-;
-; CANNONLAKE-LABEL: test_sha256msg1:
-; CANNONLAKE: # %bb.0:
-; CANNONLAKE-NEXT: sha256msg1 %xmm1, %xmm0 # sched: [4:0.50]
-; CANNONLAKE-NEXT: sha256msg1 (%rdi), %xmm0 # sched: [10:0.50]
-; CANNONLAKE-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_sha256msg1:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: sha256msg1 %xmm1, %xmm0 # sched: [2:1.00]
-; ZNVER1-NEXT: sha256msg1 (%rdi), %xmm0 # sched: [9:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load <4 x i32>, <4 x i32>* %a2
- %2 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a0, <4 x i32> %a1)
- %3 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %2, <4 x i32> %1)
- ret <4 x i32> %3
-}
-declare <4 x i32> @llvm.x86.sha256msg1(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_sha256msg2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_sha256msg2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: sha256msg2 %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: sha256msg2 (%rdi), %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; GOLDMONT-LABEL: test_sha256msg2:
-; GOLDMONT: # %bb.0:
-; GOLDMONT-NEXT: sha256msg2 %xmm1, %xmm0 # sched: [4:1.00]
-; GOLDMONT-NEXT: sha256msg2 (%rdi), %xmm0 # sched: [7:1.00]
-; GOLDMONT-NEXT: retq # sched: [4:1.00]
-;
-; CANNONLAKE-LABEL: test_sha256msg2:
-; CANNONLAKE: # %bb.0:
-; CANNONLAKE-NEXT: sha256msg2 %xmm1, %xmm0 # sched: [4:0.50]
-; CANNONLAKE-NEXT: sha256msg2 (%rdi), %xmm0 # sched: [10:0.50]
-; CANNONLAKE-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_sha256msg2:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: sha256msg2 %xmm1, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: sha256msg2 (%rdi), %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load <4 x i32>, <4 x i32>* %a2
- %2 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a0, <4 x i32> %a1)
- %3 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %2, <4 x i32> %1)
- ret <4 x i32> %3
-}
-declare <4 x i32> @llvm.x86.sha256msg2(<4 x i32>, <4 x i32>)
-
-define <4 x i32> @test_sha256rnds2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> *%a3) {
-; GENERIC-LABEL: test_sha256rnds2:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00]
-; GENERIC-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [5:1.00]
-; GENERIC-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [11:1.00]
-; GENERIC-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; GOLDMONT-LABEL: test_sha256rnds2:
-; GOLDMONT: # %bb.0:
-; GOLDMONT-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.50]
-; GOLDMONT-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50]
-; GOLDMONT-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [4:1.00]
-; GOLDMONT-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [7:1.00]
-; GOLDMONT-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.50]
-; GOLDMONT-NEXT: retq # sched: [4:1.00]
-;
-; CANNONLAKE-LABEL: test_sha256rnds2:
-; CANNONLAKE: # %bb.0:
-; CANNONLAKE-NEXT: vmovaps %xmm0, %xmm3 # sched: [1:0.33]
-; CANNONLAKE-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33]
-; CANNONLAKE-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [4:0.50]
-; CANNONLAKE-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [10:0.50]
-; CANNONLAKE-NEXT: vmovaps %xmm3, %xmm0 # sched: [1:0.33]
-; CANNONLAKE-NEXT: retq # sched: [7:1.00]
-;
-; ZNVER1-LABEL: test_sha256rnds2:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovaps %xmm0, %xmm3 # sched: [1:0.25]
-; ZNVER1-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [4:1.00]
-; ZNVER1-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [11:1.00]
-; ZNVER1-NEXT: vmovaps %xmm3, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load <4 x i32>, <4 x i32>* %a3
- %2 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2)
- %3 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %2, <4 x i32> %1, <4 x i32> %a2)
- ret <4 x i32> %3
-}
-declare <4 x i32> @llvm.x86.sha256rnds2(<4 x i32>, <4 x i32>, <4 x i32>)
diff --git a/llvm/test/CodeGen/X86/sse-schedule.ll b/llvm/test/CodeGen/X86/sse-schedule.ll
deleted file mode 100644
index f737d24c879..00000000000
--- a/llvm/test/CodeGen/X86/sse-schedule.ll
+++ /dev/null
@@ -1,6975 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ATOM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SLM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1
-
-; FIXME: we should really use -mattr=-sse2 here but some of the comparison tests don't work without access to legal <4 x i32> types.
-
-define <4 x float> @test_addps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
-; GENERIC-LABEL: test_addps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: addps (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_addps:
-; ATOM: # %bb.0:
-; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: addps (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_addps:
-; SLM: # %bb.0:
-; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: addps (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_addps:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: addps (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_addps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_addps:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: addps (%rdi), %xmm0 # sched: [9:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_addps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_addps:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: addps (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_addps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_addps:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: addps (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_addps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_addps:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: addps (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_addps:
-; SKX: # %bb.0:
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_addps:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: addps (%rdi), %xmm0 # sched: [10:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_addps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_addps:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: addps (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_addps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_addps:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: addps (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_addps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fadd <4 x float> %a0, %a1
- %2 = load <4 x float>, <4 x float> *%a2, align 16
- %3 = fadd <4 x float> %1, %2
- ret <4 x float> %3
-}
-
-define float @test_addss(float %a0, float %a1, float *%a2) {
-; GENERIC-LABEL: test_addss:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: addss (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_addss:
-; ATOM: # %bb.0:
-; ATOM-NEXT: addss %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: addss (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_addss:
-; SLM: # %bb.0:
-; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: addss (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_addss:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: addss (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_addss:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_addss:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: addss (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_addss:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_addss:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: addss (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_addss:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_addss:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: addss (%rdi), %xmm0 # sched: [9:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_addss:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_addss:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: addss (%rdi), %xmm0 # sched: [9:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_addss:
-; SKX: # %bb.0:
-; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_addss:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: addss (%rdi), %xmm0 # sched: [10:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_addss:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_addss:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: addss (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_addss:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_addss:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: addss (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_addss:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fadd float %a0, %a1
- %2 = load float, float *%a2, align 4
- %3 = fadd float %1, %2
- ret float %3
-}
-
-define <4 x float> @test_andps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
-; GENERIC-LABEL: test_andps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: andps %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: andps (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_andps:
-; ATOM: # %bb.0:
-; ATOM-NEXT: andps %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: andps (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_andps:
-; SLM: # %bb.0:
-; SLM-NEXT: andps %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: andps (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_andps:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT: andps (%rdi), %xmm0 # sched: [7:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_andps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_andps:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: andps (%rdi), %xmm0 # sched: [7:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_andps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_andps:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: andps (%rdi), %xmm0 # sched: [6:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_andps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_andps:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: andps (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_andps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_andps:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: andps (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_andps:
-; SKX: # %bb.0:
-; SKX-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_andps:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: andps %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: andps (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_andps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_andps:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: andps (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_andps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_andps:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: andps (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_andps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = bitcast <4 x float> %a0 to <4 x i32>
- %2 = bitcast <4 x float> %a1 to <4 x i32>
- %3 = and <4 x i32> %1, %2
- %4 = load <4 x float>, <4 x float> *%a2, align 16
- %5 = bitcast <4 x float> %4 to <4 x i32>
- %6 = and <4 x i32> %3, %5
- %7 = bitcast <4 x i32> %6 to <4 x float>
- ret <4 x float> %7
-}
-
-define <4 x float> @test_andnotps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
-; GENERIC-LABEL: test_andnotps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: andnps %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: andnps (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_andnotps:
-; ATOM: # %bb.0:
-; ATOM-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: andnps (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_andnotps:
-; SLM: # %bb.0:
-; SLM-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: andnps (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_andnotps:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [7:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_andnotps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_andnotps:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [7:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_andnotps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_andnotps:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [6:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_andnotps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_andnotps:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_andnotps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_andnotps:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_andnotps:
-; SKX: # %bb.0:
-; SKX-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_andnotps:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_andnotps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_andnotps:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_andnotps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_andnotps:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_andnotps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = bitcast <4 x float> %a0 to <4 x i32>
- %2 = bitcast <4 x float> %a1 to <4 x i32>
- %3 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
- %4 = and <4 x i32> %3, %2
- %5 = load <4 x float>, <4 x float> *%a2, align 16
- %6 = bitcast <4 x float> %5 to <4 x i32>
- %7 = xor <4 x i32> %4, <i32 -1, i32 -1, i32 -1, i32 -1>
- %8 = and <4 x i32> %6, %7
- %9 = bitcast <4 x i32> %8 to <4 x float>
- ret <4 x float> %9
-}
-
-define <4 x float> @test_cmpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
-; GENERIC-LABEL: test_cmpps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00]
-; GENERIC-NEXT: cmpeqps (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cmpps:
-; ATOM: # %bb.0:
-; ATOM-NEXT: cmpeqps %xmm0, %xmm1 # sched: [5:5.00]
-; ATOM-NEXT: cmpeqps (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cmpps:
-; SLM: # %bb.0:
-; SLM-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00]
-; SLM-NEXT: cmpeqps (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cmpps:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00]
-; SANDY-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cmpps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
-; SANDY-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cmpps:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [9:1.00]
-; HASWELL-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cmpps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
-; HASWELL-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; HASWELL-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cmpps:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cmpps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
-; BROADWELL-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cmpps:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cmpps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [4:0.50]
-; SKYLAKE-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKYLAKE-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cmpps:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [4:0.50]
-; SKX-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cmpps:
-; SKX: # %bb.0:
-; SKX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [4:0.50]
-; SKX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKX-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cmpps:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [2:1.00]
-; BDVER2-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [7:1.00]
-; BDVER2-SSE-NEXT: orps %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cmpps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [2:1.00]
-; BDVER2-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BDVER2-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cmpps:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [2:1.00]
-; BTVER2-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cmpps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [2:1.00]
-; BTVER2-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cmpps:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cmpps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
-; ZNVER1-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fcmp oeq <4 x float> %a0, %a1
- %2 = load <4 x float>, <4 x float> *%a2, align 16
- %3 = fcmp oeq <4 x float> %a0, %2
- %4 = sext <4 x i1> %1 to <4 x i32>
- %5 = sext <4 x i1> %3 to <4 x i32>
- %6 = or <4 x i32> %4, %5
- %7 = bitcast <4 x i32> %6 to <4 x float>
- ret <4 x float> %7
-}
-
-define float @test_cmpss(float %a0, float %a1, float *%a2) {
-; GENERIC-LABEL: test_cmpss:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: cmpeqss (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cmpss:
-; ATOM: # %bb.0:
-; ATOM-NEXT: cmpeqss %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: cmpeqss (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cmpss:
-; SLM: # %bb.0:
-; SLM-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: cmpeqss (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cmpss:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cmpss:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cmpss:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cmpss:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cmpss:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cmpss:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cmpss:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [9:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cmpss:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cmpss:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [9:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cmpss:
-; SKX: # %bb.0:
-; SKX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cmpss:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [2:1.00]
-; BDVER2-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [7:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cmpss:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BDVER2-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cmpss:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cmpss:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cmpss:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cmpss:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = insertelement <4 x float> undef, float %a0, i32 0
- %2 = insertelement <4 x float> undef, float %a1, i32 0
- %3 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %1, <4 x float> %2, i8 0)
- %4 = load float, float *%a2, align 4
- %5 = insertelement <4 x float> undef, float %4, i32 0
- %6 = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %3, <4 x float> %5, i8 0)
- %7 = extractelement <4 x float> %6, i32 0
- ret float %7
-}
-declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
-
-define i32 @test_comiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
-; GENERIC-LABEL: test_comiss:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: comiss %xmm1, %xmm0 # sched: [2:1.00]
-; GENERIC-NEXT: setnp %al # sched: [1:0.50]
-; GENERIC-NEXT: sete %cl # sched: [1:0.50]
-; GENERIC-NEXT: andb %al, %cl # sched: [1:0.33]
-; GENERIC-NEXT: comiss (%rdi), %xmm0 # sched: [8:1.00]
-; GENERIC-NEXT: setnp %al # sched: [1:0.50]
-; GENERIC-NEXT: sete %dl # sched: [1:0.50]
-; GENERIC-NEXT: andb %al, %dl # sched: [1:0.33]
-; GENERIC-NEXT: orb %cl, %dl # sched: [1:0.33]
-; GENERIC-NEXT: movzbl %dl, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_comiss:
-; ATOM: # %bb.0:
-; ATOM-NEXT: comiss %xmm1, %xmm0 # sched: [9:4.50]
-; ATOM-NEXT: setnp %al # sched: [1:0.50]
-; ATOM-NEXT: sete %cl # sched: [1:0.50]
-; ATOM-NEXT: andb %al, %cl # sched: [1:0.50]
-; ATOM-NEXT: comiss (%rdi), %xmm0 # sched: [10:5.00]
-; ATOM-NEXT: setnp %al # sched: [1:0.50]
-; ATOM-NEXT: sete %dl # sched: [1:0.50]
-; ATOM-NEXT: andb %al, %dl # sched: [1:0.50]
-; ATOM-NEXT: orb %cl, %dl # sched: [1:0.50]
-; ATOM-NEXT: movzbl %dl, %eax # sched: [1:1.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_comiss:
-; SLM: # %bb.0:
-; SLM-NEXT: comiss %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: setnp %al # sched: [1:0.50]
-; SLM-NEXT: sete %cl # sched: [1:0.50]
-; SLM-NEXT: andb %al, %cl # sched: [1:0.50]
-; SLM-NEXT: comiss (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT: setnp %al # sched: [1:0.50]
-; SLM-NEXT: sete %dl # sched: [1:0.50]
-; SLM-NEXT: andb %al, %dl # sched: [1:0.50]
-; SLM-NEXT: orb %cl, %dl # sched: [1:0.50]
-; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_comiss:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [2:1.00]
-; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50]
-; SANDY-SSE-NEXT: sete %cl # sched: [1:0.50]
-; SANDY-SSE-NEXT: andb %al, %cl # sched: [1:0.33]
-; SANDY-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [8:1.00]
-; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50]
-; SANDY-SSE-NEXT: sete %dl # sched: [1:0.50]
-; SANDY-SSE-NEXT: andb %al, %dl # sched: [1:0.33]
-; SANDY-SSE-NEXT: orb %cl, %dl # sched: [1:0.33]
-; SANDY-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_comiss:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vcomiss %xmm1, %xmm0 # sched: [2:1.00]
-; SANDY-NEXT: setnp %al # sched: [1:0.50]
-; SANDY-NEXT: sete %cl # sched: [1:0.50]
-; SANDY-NEXT: andb %al, %cl # sched: [1:0.33]
-; SANDY-NEXT: vcomiss (%rdi), %xmm0 # sched: [8:1.00]
-; SANDY-NEXT: setnp %al # sched: [1:0.50]
-; SANDY-NEXT: sete %dl # sched: [1:0.50]
-; SANDY-NEXT: andb %al, %dl # sched: [1:0.33]
-; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33]
-; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_comiss:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
-; HASWELL-SSE-NEXT: sete %cl # sched: [1:0.50]
-; HASWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
-; HASWELL-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
-; HASWELL-SSE-NEXT: sete %dl # sched: [1:0.50]
-; HASWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
-; HASWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
-; HASWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_comiss:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: setnp %al # sched: [1:0.50]
-; HASWELL-NEXT: sete %cl # sched: [1:0.50]
-; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25]
-; HASWELL-NEXT: vcomiss (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-NEXT: setnp %al # sched: [1:0.50]
-; HASWELL-NEXT: sete %dl # sched: [1:0.50]
-; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25]
-; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25]
-; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_comiss:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: sete %cl # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: sete %dl # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_comiss:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: setnp %al # sched: [1:0.50]
-; BROADWELL-NEXT: sete %cl # sched: [1:0.50]
-; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25]
-; BROADWELL-NEXT: vcomiss (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT: setnp %al # sched: [1:0.50]
-; BROADWELL-NEXT: sete %dl # sched: [1:0.50]
-; BROADWELL-NEXT: andb %al, %dl # sched: [1:0.25]
-; BROADWELL-NEXT: orb %cl, %dl # sched: [1:0.25]
-; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_comiss:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: sete %cl # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [7:1.00]
-; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: sete %dl # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_comiss:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcomiss %xmm1, %xmm0 # sched: [2:1.00]
-; SKYLAKE-NEXT: setnp %al # sched: [1:0.50]
-; SKYLAKE-NEXT: sete %cl # sched: [1:0.50]
-; SKYLAKE-NEXT: andb %al, %cl # sched: [1:0.25]
-; SKYLAKE-NEXT: vcomiss (%rdi), %xmm0 # sched: [7:1.00]
-; SKYLAKE-NEXT: setnp %al # sched: [1:0.50]
-; SKYLAKE-NEXT: sete %dl # sched: [1:0.50]
-; SKYLAKE-NEXT: andb %al, %dl # sched: [1:0.25]
-; SKYLAKE-NEXT: orb %cl, %dl # sched: [1:0.25]
-; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_comiss:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [2:1.00]
-; SKX-SSE-NEXT: setnp %al # sched: [1:0.50]
-; SKX-SSE-NEXT: sete %cl # sched: [1:0.50]
-; SKX-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
-; SKX-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [7:1.00]
-; SKX-SSE-NEXT: setnp %al # sched: [1:0.50]
-; SKX-SSE-NEXT: sete %dl # sched: [1:0.50]
-; SKX-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
-; SKX-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
-; SKX-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_comiss:
-; SKX: # %bb.0:
-; SKX-NEXT: vcomiss %xmm1, %xmm0 # sched: [2:1.00]
-; SKX-NEXT: setnp %al # sched: [1:0.50]
-; SKX-NEXT: sete %cl # sched: [1:0.50]
-; SKX-NEXT: andb %al, %cl # sched: [1:0.25]
-; SKX-NEXT: vcomiss (%rdi), %xmm0 # sched: [7:1.00]
-; SKX-NEXT: setnp %al # sched: [1:0.50]
-; SKX-NEXT: sete %dl # sched: [1:0.50]
-; SKX-NEXT: andb %al, %dl # sched: [1:0.25]
-; SKX-NEXT: orb %cl, %dl # sched: [1:0.25]
-; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_comiss:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [1:1.00]
-; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
-; BDVER2-SSE-NEXT: sete %cl # sched: [1:0.50]
-; BDVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50]
-; BDVER2-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [6:1.00]
-; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
-; BDVER2-SSE-NEXT: sete %dl # sched: [1:0.50]
-; BDVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50]
-; BDVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50]
-; BDVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_comiss:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcomiss %xmm1, %xmm0 # sched: [1:1.00]
-; BDVER2-NEXT: setnp %al # sched: [1:0.50]
-; BDVER2-NEXT: sete %cl # sched: [1:0.50]
-; BDVER2-NEXT: andb %al, %cl # sched: [1:0.50]
-; BDVER2-NEXT: vcomiss (%rdi), %xmm0 # sched: [6:1.00]
-; BDVER2-NEXT: setnp %al # sched: [1:0.50]
-; BDVER2-NEXT: sete %dl # sched: [1:0.50]
-; BDVER2-NEXT: andb %al, %dl # sched: [1:0.50]
-; BDVER2-NEXT: orb %cl, %dl # sched: [1:0.50]
-; BDVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_comiss:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
-; BTVER2-SSE-NEXT: sete %cl # sched: [1:0.50]
-; BTVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50]
-; BTVER2-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
-; BTVER2-SSE-NEXT: sete %dl # sched: [1:0.50]
-; BTVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50]
-; BTVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50]
-; BTVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_comiss:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: setnp %al # sched: [1:0.50]
-; BTVER2-NEXT: sete %cl # sched: [1:0.50]
-; BTVER2-NEXT: andb %al, %cl # sched: [1:0.50]
-; BTVER2-NEXT: vcomiss (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-NEXT: setnp %al # sched: [1:0.50]
-; BTVER2-NEXT: sete %dl # sched: [1:0.50]
-; BTVER2-NEXT: andb %al, %dl # sched: [1:0.50]
-; BTVER2-NEXT: orb %cl, %dl # sched: [1:0.50]
-; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_comiss:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: sete %cl # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: sete %dl # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_comiss:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: setnp %al # sched: [1:0.25]
-; ZNVER1-NEXT: sete %cl # sched: [1:0.25]
-; ZNVER1-NEXT: andb %al, %cl # sched: [1:0.25]
-; ZNVER1-NEXT: vcomiss (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT: setnp %al # sched: [1:0.25]
-; ZNVER1-NEXT: sete %dl # sched: [1:0.25]
-; ZNVER1-NEXT: andb %al, %dl # sched: [1:0.25]
-; ZNVER1-NEXT: orb %cl, %dl # sched: [1:0.25]
-; ZNVER1-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1)
- %2 = load <4 x float>, <4 x float> *%a2, align 4
- %3 = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %2)
- %4 = or i32 %1, %3
- ret i32 %4
-}
-declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
-
-define float @test_cvtsi2ss(i32 %a0, i32 *%a1) {
-; GENERIC-LABEL: test_cvtsi2ss:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [5:2.00]
-; GENERIC-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [10:1.00]
-; GENERIC-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvtsi2ss:
-; ATOM: # %bb.0:
-; ATOM-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [7:3.50]
-; ATOM-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [6:3.00]
-; ATOM-NEXT: addss %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvtsi2ss:
-; SLM: # %bb.0:
-; SLM-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [4:0.50]
-; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cvtsi2ss:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [5:2.00]
-; SANDY-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [10:1.00]
-; SANDY-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvtsi2ss:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:2.00]
-; SANDY-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [10:1.00]
-; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cvtsi2ss:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [4:1.00]
-; HASWELL-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [9:1.00]
-; HASWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cvtsi2ss:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00]
-; HASWELL-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cvtsi2ss:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [4:1.00]
-; BROADWELL-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [9:1.00]
-; BROADWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvtsi2ss:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00]
-; BROADWELL-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cvtsi2ss:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [5:1.00]
-; SKYLAKE-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [9:1.00]
-; SKYLAKE-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvtsi2ss:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00]
-; SKYLAKE-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cvtsi2ss:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [5:1.00]
-; SKX-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [9:1.00]
-; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvtsi2ss:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00]
-; SKX-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cvtsi2ss:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [9:1.00]
-; BDVER2-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [14:1.00]
-; BDVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cvtsi2ss:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [14:1.00]
-; BDVER2-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; BDVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cvtsi2ss:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [9:1.00]
-; BTVER2-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [10:1.00]
-; BTVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cvtsi2ss:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [10:1.00]
-; BTVER2-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cvtsi2ss:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [12:1.00]
-; ZNVER1-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [5:1.00]
-; ZNVER1-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cvtsi2ss:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00]
-; ZNVER1-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [12:1.00]
-; ZNVER1-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = sitofp i32 %a0 to float
- %2 = load i32, i32 *%a1, align 4
- %3 = sitofp i32 %2 to float
- %4 = fadd float %1, %3
- ret float %4
-}
-
-define float @test_cvtsi2ssq(i64 %a0, i64 *%a1) {
-; GENERIC-LABEL: test_cvtsi2ssq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [5:2.00]
-; GENERIC-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [10:1.00]
-; GENERIC-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvtsi2ssq:
-; ATOM: # %bb.0:
-; ATOM-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [7:3.50]
-; ATOM-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [6:3.00]
-; ATOM-NEXT: addss %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvtsi2ssq:
-; SLM: # %bb.0:
-; SLM-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [4:0.50]
-; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cvtsi2ssq:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [5:2.00]
-; SANDY-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [10:1.00]
-; SANDY-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvtsi2ssq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00]
-; SANDY-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [10:1.00]
-; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cvtsi2ssq:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [5:2.00]
-; HASWELL-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [9:1.00]
-; HASWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cvtsi2ssq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00]
-; HASWELL-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cvtsi2ssq:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [5:2.00]
-; BROADWELL-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [9:1.00]
-; BROADWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvtsi2ssq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00]
-; BROADWELL-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cvtsi2ssq:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [6:2.00]
-; SKYLAKE-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [9:1.00]
-; SKYLAKE-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvtsi2ssq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [6:2.00]
-; SKYLAKE-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cvtsi2ssq:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [6:2.00]
-; SKX-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [9:1.00]
-; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvtsi2ssq:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [6:2.00]
-; SKX-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cvtsi2ssq:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [13:1.00]
-; BDVER2-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [9:1.00]
-; BDVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cvtsi2ssq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [14:1.00]
-; BDVER2-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; BDVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cvtsi2ssq:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [9:1.00]
-; BTVER2-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [10:1.00]
-; BTVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cvtsi2ssq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [10:1.00]
-; BTVER2-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cvtsi2ssq:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [12:1.00]
-; ZNVER1-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [5:1.00]
-; ZNVER1-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cvtsi2ssq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:1.00]
-; ZNVER1-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [12:1.00]
-; ZNVER1-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = sitofp i64 %a0 to float
- %2 = load i64, i64 *%a1, align 8
- %3 = sitofp i64 %2 to float
- %4 = fadd float %1, %3
- ret float %4
-}
-
-define i32 @test_cvtss2si(float %a0, float *%a1) {
-; GENERIC-LABEL: test_cvtss2si:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: cvtss2si %xmm0, %ecx # sched: [5:1.00]
-; GENERIC-NEXT: cvtss2si (%rdi), %eax # sched: [9:1.00]
-; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvtss2si:
-; ATOM: # %bb.0:
-; ATOM-NEXT: cvtss2si (%rdi), %eax # sched: [9:4.50]
-; ATOM-NEXT: cvtss2si %xmm0, %ecx # sched: [8:4.00]
-; ATOM-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvtss2si:
-; SLM: # %bb.0:
-; SLM-NEXT: cvtss2si (%rdi), %eax # sched: [7:1.00]
-; SLM-NEXT: cvtss2si %xmm0, %ecx # sched: [4:0.50]
-; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cvtss2si:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [5:1.00]
-; SANDY-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [9:1.00]
-; SANDY-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvtss2si:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vcvtss2si %xmm0, %ecx # sched: [5:1.00]
-; SANDY-NEXT: vcvtss2si (%rdi), %eax # sched: [10:1.00]
-; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cvtss2si:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [4:1.00]
-; HASWELL-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [9:1.00]
-; HASWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cvtss2si:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcvtss2si %xmm0, %ecx # sched: [4:1.00]
-; HASWELL-NEXT: vcvtss2si (%rdi), %eax # sched: [9:1.00]
-; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cvtss2si:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [9:1.00]
-; BROADWELL-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [4:1.00]
-; BROADWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvtss2si:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcvtss2si (%rdi), %eax # sched: [9:1.00]
-; BROADWELL-NEXT: vcvtss2si %xmm0, %ecx # sched: [4:1.00]
-; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cvtss2si:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [6:1.00]
-; SKYLAKE-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [11:1.00]
-; SKYLAKE-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvtss2si:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcvtss2si %xmm0, %ecx # sched: [6:1.00]
-; SKYLAKE-NEXT: vcvtss2si (%rdi), %eax # sched: [11:1.00]
-; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cvtss2si:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [6:1.00]
-; SKX-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [11:1.00]
-; SKX-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvtss2si:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtss2si %xmm0, %ecx # sched: [6:1.00]
-; SKX-NEXT: vcvtss2si (%rdi), %eax # sched: [11:1.00]
-; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cvtss2si:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [18:1.00]
-; BDVER2-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [13:1.00]
-; BDVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cvtss2si:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcvtss2si (%rdi), %eax # sched: [18:1.00]
-; BDVER2-NEXT: vcvtss2si %xmm0, %ecx # sched: [13:1.00]
-; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cvtss2si:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [12:1.00]
-; BTVER2-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [7:1.00]
-; BTVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cvtss2si:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcvtss2si (%rdi), %eax # sched: [12:1.00]
-; BTVER2-NEXT: vcvtss2si %xmm0, %ecx # sched: [7:1.00]
-; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cvtss2si:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [12:1.00]
-; ZNVER1-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [5:1.00]
-; ZNVER1-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cvtss2si:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcvtss2si (%rdi), %eax # sched: [12:1.00]
-; ZNVER1-NEXT: vcvtss2si %xmm0, %ecx # sched: [5:1.00]
-; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = insertelement <4 x float> undef, float %a0, i32 0
- %2 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %1)
- %3 = load float, float *%a1, align 4
- %4 = insertelement <4 x float> undef, float %3, i32 0
- %5 = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %4)
- %6 = add i32 %2, %5
- ret i32 %6
-}
-declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
-
-define i64 @test_cvtss2siq(float %a0, float *%a1) {
-; GENERIC-LABEL: test_cvtss2siq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: cvtss2si %xmm0, %rcx # sched: [5:1.00]
-; GENERIC-NEXT: cvtss2si (%rdi), %rax # sched: [9:1.00]
-; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvtss2siq:
-; ATOM: # %bb.0:
-; ATOM-NEXT: cvtss2si (%rdi), %rax # sched: [10:5.00]
-; ATOM-NEXT: cvtss2si %xmm0, %rcx # sched: [9:4.50]
-; ATOM-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvtss2siq:
-; SLM: # %bb.0:
-; SLM-NEXT: cvtss2si (%rdi), %rax # sched: [7:1.00]
-; SLM-NEXT: cvtss2si %xmm0, %rcx # sched: [4:0.50]
-; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cvtss2siq:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [5:1.00]
-; SANDY-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [9:1.00]
-; SANDY-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvtss2siq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vcvtss2si %xmm0, %rcx # sched: [5:1.00]
-; SANDY-NEXT: vcvtss2si (%rdi), %rax # sched: [10:1.00]
-; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cvtss2siq:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [4:1.00]
-; HASWELL-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [9:1.00]
-; HASWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cvtss2siq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcvtss2si %xmm0, %rcx # sched: [4:1.00]
-; HASWELL-NEXT: vcvtss2si (%rdi), %rax # sched: [9:1.00]
-; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cvtss2siq:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [9:1.00]
-; BROADWELL-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [4:1.00]
-; BROADWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvtss2siq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcvtss2si (%rdi), %rax # sched: [9:1.00]
-; BROADWELL-NEXT: vcvtss2si %xmm0, %rcx # sched: [4:1.00]
-; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cvtss2siq:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [6:1.00]
-; SKYLAKE-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [11:1.00]
-; SKYLAKE-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvtss2siq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcvtss2si %xmm0, %rcx # sched: [6:1.00]
-; SKYLAKE-NEXT: vcvtss2si (%rdi), %rax # sched: [11:1.00]
-; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cvtss2siq:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [7:1.00]
-; SKX-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [11:1.00]
-; SKX-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvtss2siq:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtss2si %xmm0, %rcx # sched: [7:1.00]
-; SKX-NEXT: vcvtss2si (%rdi), %rax # sched: [11:1.00]
-; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cvtss2siq:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [18:1.00]
-; BDVER2-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [13:1.00]
-; BDVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cvtss2siq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcvtss2si (%rdi), %rax # sched: [18:1.00]
-; BDVER2-NEXT: vcvtss2si %xmm0, %rcx # sched: [13:1.00]
-; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cvtss2siq:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [12:1.00]
-; BTVER2-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [7:1.00]
-; BTVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cvtss2siq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcvtss2si (%rdi), %rax # sched: [12:1.00]
-; BTVER2-NEXT: vcvtss2si %xmm0, %rcx # sched: [7:1.00]
-; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cvtss2siq:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [12:1.00]
-; ZNVER1-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [5:1.00]
-; ZNVER1-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cvtss2siq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcvtss2si (%rdi), %rax # sched: [12:1.00]
-; ZNVER1-NEXT: vcvtss2si %xmm0, %rcx # sched: [5:1.00]
-; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = insertelement <4 x float> undef, float %a0, i32 0
- %2 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %1)
- %3 = load float, float *%a1, align 4
- %4 = insertelement <4 x float> undef, float %3, i32 0
- %5 = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %4)
- %6 = add i64 %2, %5
- ret i64 %6
-}
-declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
-
-define i32 @test_cvttss2si(float %a0, float *%a1) {
-; GENERIC-LABEL: test_cvttss2si:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: cvttss2si %xmm0, %ecx # sched: [5:1.00]
-; GENERIC-NEXT: cvttss2si (%rdi), %eax # sched: [9:1.00]
-; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvttss2si:
-; ATOM: # %bb.0:
-; ATOM-NEXT: cvttss2si (%rdi), %eax # sched: [9:4.50]
-; ATOM-NEXT: cvttss2si %xmm0, %ecx # sched: [8:4.00]
-; ATOM-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvttss2si:
-; SLM: # %bb.0:
-; SLM-NEXT: cvttss2si (%rdi), %eax # sched: [7:1.00]
-; SLM-NEXT: cvttss2si %xmm0, %ecx # sched: [4:0.50]
-; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cvttss2si:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [5:1.00]
-; SANDY-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [9:1.00]
-; SANDY-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvttss2si:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vcvttss2si %xmm0, %ecx # sched: [5:1.00]
-; SANDY-NEXT: vcvttss2si (%rdi), %eax # sched: [10:1.00]
-; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cvttss2si:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [4:1.00]
-; HASWELL-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [9:1.00]
-; HASWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cvttss2si:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcvttss2si %xmm0, %ecx # sched: [4:1.00]
-; HASWELL-NEXT: vcvttss2si (%rdi), %eax # sched: [9:1.00]
-; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cvttss2si:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [9:1.00]
-; BROADWELL-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [4:1.00]
-; BROADWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvttss2si:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcvttss2si (%rdi), %eax # sched: [9:1.00]
-; BROADWELL-NEXT: vcvttss2si %xmm0, %ecx # sched: [4:1.00]
-; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cvttss2si:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [7:1.00]
-; SKYLAKE-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [11:1.00]
-; SKYLAKE-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvttss2si:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcvttss2si %xmm0, %ecx # sched: [7:1.00]
-; SKYLAKE-NEXT: vcvttss2si (%rdi), %eax # sched: [11:1.00]
-; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cvttss2si:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [6:1.00]
-; SKX-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [11:1.00]
-; SKX-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvttss2si:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvttss2si %xmm0, %ecx # sched: [6:1.00]
-; SKX-NEXT: vcvttss2si (%rdi), %eax # sched: [11:1.00]
-; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cvttss2si:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [18:1.00]
-; BDVER2-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [13:1.00]
-; BDVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cvttss2si:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcvttss2si (%rdi), %eax # sched: [18:1.00]
-; BDVER2-NEXT: vcvttss2si %xmm0, %ecx # sched: [13:1.00]
-; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cvttss2si:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [12:1.00]
-; BTVER2-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [7:1.00]
-; BTVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cvttss2si:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcvttss2si (%rdi), %eax # sched: [12:1.00]
-; BTVER2-NEXT: vcvttss2si %xmm0, %ecx # sched: [7:1.00]
-; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cvttss2si:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [12:1.00]
-; ZNVER1-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [5:1.00]
-; ZNVER1-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cvttss2si:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcvttss2si (%rdi), %eax # sched: [12:1.00]
-; ZNVER1-NEXT: vcvttss2si %xmm0, %ecx # sched: [5:1.00]
-; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fptosi float %a0 to i32
- %2 = load float, float *%a1, align 4
- %3 = fptosi float %2 to i32
- %4 = add i32 %1, %3
- ret i32 %4
-}
-
-define i64 @test_cvttss2siq(float %a0, float *%a1) {
-; GENERIC-LABEL: test_cvttss2siq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: cvttss2si %xmm0, %rcx # sched: [5:1.00]
-; GENERIC-NEXT: cvttss2si (%rdi), %rax # sched: [9:1.00]
-; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvttss2siq:
-; ATOM: # %bb.0:
-; ATOM-NEXT: cvttss2si (%rdi), %rax # sched: [10:5.00]
-; ATOM-NEXT: cvttss2si %xmm0, %rcx # sched: [9:4.50]
-; ATOM-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvttss2siq:
-; SLM: # %bb.0:
-; SLM-NEXT: cvttss2si (%rdi), %rax # sched: [7:1.00]
-; SLM-NEXT: cvttss2si %xmm0, %rcx # sched: [4:0.50]
-; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cvttss2siq:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [5:1.00]
-; SANDY-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [9:1.00]
-; SANDY-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvttss2siq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vcvttss2si %xmm0, %rcx # sched: [5:1.00]
-; SANDY-NEXT: vcvttss2si (%rdi), %rax # sched: [10:1.00]
-; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cvttss2siq:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [10:1.00]
-; HASWELL-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [4:1.00]
-; HASWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cvttss2siq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcvttss2si %xmm0, %rcx # sched: [4:1.00]
-; HASWELL-NEXT: vcvttss2si (%rdi), %rax # sched: [9:1.00]
-; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cvttss2siq:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [10:1.00]
-; BROADWELL-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [4:1.00]
-; BROADWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvttss2siq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcvttss2si (%rdi), %rax # sched: [9:1.00]
-; BROADWELL-NEXT: vcvttss2si %xmm0, %rcx # sched: [4:1.00]
-; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cvttss2siq:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [7:1.00]
-; SKYLAKE-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [12:1.00]
-; SKYLAKE-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvttss2siq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcvttss2si %xmm0, %rcx # sched: [7:1.00]
-; SKYLAKE-NEXT: vcvttss2si (%rdi), %rax # sched: [11:1.00]
-; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cvttss2siq:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [7:1.00]
-; SKX-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [12:1.00]
-; SKX-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvttss2siq:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvttss2si %xmm0, %rcx # sched: [7:1.00]
-; SKX-NEXT: vcvttss2si (%rdi), %rax # sched: [11:1.00]
-; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cvttss2siq:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [18:1.00]
-; BDVER2-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [13:1.00]
-; BDVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cvttss2siq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcvttss2si (%rdi), %rax # sched: [18:1.00]
-; BDVER2-NEXT: vcvttss2si %xmm0, %rcx # sched: [13:1.00]
-; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cvttss2siq:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [12:1.00]
-; BTVER2-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [7:1.00]
-; BTVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cvttss2siq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcvttss2si (%rdi), %rax # sched: [12:1.00]
-; BTVER2-NEXT: vcvttss2si %xmm0, %rcx # sched: [7:1.00]
-; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cvttss2siq:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [12:1.00]
-; ZNVER1-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [5:1.00]
-; ZNVER1-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cvttss2siq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcvttss2si (%rdi), %rax # sched: [12:1.00]
-; ZNVER1-NEXT: vcvttss2si %xmm0, %rcx # sched: [5:1.00]
-; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fptosi float %a0 to i64
- %2 = load float, float *%a1, align 4
- %3 = fptosi float %2 to i64
- %4 = add i64 %1, %3
- ret i64 %4
-}
-
-define <4 x float> @test_divps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
-; GENERIC-LABEL: test_divps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: divps %xmm1, %xmm0 # sched: [14:14.00]
-; GENERIC-NEXT: divps (%rdi), %xmm0 # sched: [20:14.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_divps:
-; ATOM: # %bb.0:
-; ATOM-NEXT: divps %xmm1, %xmm0 # sched: [70:35.00]
-; ATOM-NEXT: divps (%rdi), %xmm0 # sched: [70:35.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_divps:
-; SLM: # %bb.0:
-; SLM-NEXT: divps %xmm1, %xmm0 # sched: [39:39.00]
-; SLM-NEXT: divps (%rdi), %xmm0 # sched: [42:39.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_divps:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: divps %xmm1, %xmm0 # sched: [14:14.00]
-; SANDY-SSE-NEXT: divps (%rdi), %xmm0 # sched: [20:14.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_divps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [14:14.00]
-; SANDY-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [20:14.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_divps:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: divps %xmm1, %xmm0 # sched: [13:7.00]
-; HASWELL-SSE-NEXT: divps (%rdi), %xmm0 # sched: [19:7.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_divps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [13:7.00]
-; HASWELL-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [19:7.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_divps:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: divps %xmm1, %xmm0 # sched: [11:5.00]
-; BROADWELL-SSE-NEXT: divps (%rdi), %xmm0 # sched: [16:5.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_divps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [11:5.00]
-; BROADWELL-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [16:5.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_divps:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: divps %xmm1, %xmm0 # sched: [11:3.00]
-; SKYLAKE-SSE-NEXT: divps (%rdi), %xmm0 # sched: [17:5.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_divps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [11:3.00]
-; SKYLAKE-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [17:5.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_divps:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: divps %xmm1, %xmm0 # sched: [11:3.00]
-; SKX-SSE-NEXT: divps (%rdi), %xmm0 # sched: [17:5.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_divps:
-; SKX: # %bb.0:
-; SKX-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [11:3.00]
-; SKX-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [17:5.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_divps:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: divps %xmm1, %xmm0 # sched: [9:9.50]
-; BDVER2-SSE-NEXT: divps (%rdi), %xmm0 # sched: [14:9.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_divps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [9:9.50]
-; BDVER2-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [14:9.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_divps:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: divps %xmm1, %xmm0 # sched: [19:19.00]
-; BTVER2-SSE-NEXT: divps (%rdi), %xmm0 # sched: [24:19.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_divps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
-; BTVER2-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [24:19.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_divps:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: divps %xmm1, %xmm0 # sched: [15:1.00]
-; ZNVER1-SSE-NEXT: divps (%rdi), %xmm0 # sched: [22:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_divps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [15:1.00]
-; ZNVER1-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [22:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fdiv <4 x float> %a0, %a1
- %2 = load <4 x float>, <4 x float> *%a2, align 16
- %3 = fdiv <4 x float> %1, %2
- ret <4 x float> %3
-}
-
-define float @test_divss(float %a0, float %a1, float *%a2) {
-; GENERIC-LABEL: test_divss:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: divss %xmm1, %xmm0 # sched: [14:14.00]
-; GENERIC-NEXT: divss (%rdi), %xmm0 # sched: [20:14.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_divss:
-; ATOM: # %bb.0:
-; ATOM-NEXT: divss %xmm1, %xmm0 # sched: [34:17.00]
-; ATOM-NEXT: divss (%rdi), %xmm0 # sched: [34:17.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_divss:
-; SLM: # %bb.0:
-; SLM-NEXT: divss %xmm1, %xmm0 # sched: [19:17.00]
-; SLM-NEXT: divss (%rdi), %xmm0 # sched: [22:17.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_divss:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: divss %xmm1, %xmm0 # sched: [14:14.00]
-; SANDY-SSE-NEXT: divss (%rdi), %xmm0 # sched: [20:14.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_divss:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [14:14.00]
-; SANDY-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [20:14.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_divss:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: divss %xmm1, %xmm0 # sched: [13:7.00]
-; HASWELL-SSE-NEXT: divss (%rdi), %xmm0 # sched: [18:7.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_divss:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [13:7.00]
-; HASWELL-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [18:7.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_divss:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: divss %xmm1, %xmm0 # sched: [11:3.00]
-; BROADWELL-SSE-NEXT: divss (%rdi), %xmm0 # sched: [16:5.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_divss:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [11:3.00]
-; BROADWELL-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:5.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_divss:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: divss %xmm1, %xmm0 # sched: [11:3.00]
-; SKYLAKE-SSE-NEXT: divss (%rdi), %xmm0 # sched: [16:3.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_divss:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [11:3.00]
-; SKYLAKE-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:3.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_divss:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: divss %xmm1, %xmm0 # sched: [11:3.00]
-; SKX-SSE-NEXT: divss (%rdi), %xmm0 # sched: [16:3.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_divss:
-; SKX: # %bb.0:
-; SKX-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [11:3.00]
-; SKX-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:3.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_divss:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: divss %xmm1, %xmm0 # sched: [9:9.50]
-; BDVER2-SSE-NEXT: divss (%rdi), %xmm0 # sched: [14:9.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_divss:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [9:9.50]
-; BDVER2-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [14:9.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_divss:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: divss %xmm1, %xmm0 # sched: [19:19.00]
-; BTVER2-SSE-NEXT: divss (%rdi), %xmm0 # sched: [24:19.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_divss:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
-; BTVER2-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [24:19.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_divss:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: divss %xmm1, %xmm0 # sched: [15:1.00]
-; ZNVER1-SSE-NEXT: divss (%rdi), %xmm0 # sched: [22:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_divss:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [15:1.00]
-; ZNVER1-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [22:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fdiv float %a0, %a1
- %2 = load float, float *%a2, align 4
- %3 = fdiv float %1, %2
- ret float %3
-}
-
-define void @test_ldmxcsr(i32 %a0) {
-; GENERIC-LABEL: test_ldmxcsr:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
-; GENERIC-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_ldmxcsr:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
-; ATOM-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:2.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_ldmxcsr:
-; SLM: # %bb.0:
-; SLM-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
-; SLM-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [3:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_ldmxcsr:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
-; SANDY-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_ldmxcsr:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
-; SANDY-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_ldmxcsr:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
-; HASWELL-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_ldmxcsr:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
-; HASWELL-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_ldmxcsr:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_ldmxcsr:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
-; BROADWELL-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_ldmxcsr:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_ldmxcsr:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
-; SKYLAKE-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_ldmxcsr:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
-; SKX-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_ldmxcsr:
-; SKX: # %bb.0:
-; SKX-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
-; SKX-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_ldmxcsr:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
-; BDVER2-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_ldmxcsr:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
-; BDVER2-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_ldmxcsr:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
-; BTVER2-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_ldmxcsr:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
-; BTVER2-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_ldmxcsr:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:0.50]
-; ZNVER1-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_ldmxcsr:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:0.50]
-; ZNVER1-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = alloca i32, align 4
- %2 = bitcast i32* %1 to i8*
- store i32 %a0, i32* %1
- call void @llvm.x86.sse.ldmxcsr(i8* %2)
- ret void
-}
-declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind readnone
-
-define <4 x float> @test_maxps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
-; GENERIC-LABEL: test_maxps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: maxps (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_maxps:
-; ATOM: # %bb.0:
-; ATOM-NEXT: maxps %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: maxps (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_maxps:
-; SLM: # %bb.0:
-; SLM-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: maxps (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_maxps:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_maxps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_maxps:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [9:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_maxps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_maxps:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_maxps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_maxps:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_maxps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_maxps:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_maxps:
-; SKX: # %bb.0:
-; SKX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_maxps:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [2:1.00]
-; BDVER2-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [7:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_maxps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BDVER2-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_maxps:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_maxps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_maxps:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_maxps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1)
- %2 = load <4 x float>, <4 x float> *%a2, align 16
- %3 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %1, <4 x float> %2)
- ret <4 x float> %3
-}
-declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
-
-define <4 x float> @test_maxss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
-; GENERIC-LABEL: test_maxss:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: maxss (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_maxss:
-; ATOM: # %bb.0:
-; ATOM-NEXT: maxss %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: maxss (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_maxss:
-; SLM: # %bb.0:
-; SLM-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: maxss (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_maxss:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_maxss:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_maxss:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_maxss:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_maxss:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_maxss:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_maxss:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [9:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_maxss:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_maxss:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [9:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_maxss:
-; SKX: # %bb.0:
-; SKX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_maxss:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [2:1.00]
-; BDVER2-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [7:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_maxss:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BDVER2-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_maxss:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_maxss:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_maxss:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_maxss:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1)
- %2 = load <4 x float>, <4 x float> *%a2, align 16
- %3 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %1, <4 x float> %2)
- ret <4 x float> %3
-}
-declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
-
-define <4 x float> @test_minps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
-; GENERIC-LABEL: test_minps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: minps (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_minps:
-; ATOM: # %bb.0:
-; ATOM-NEXT: minps %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: minps (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_minps:
-; SLM: # %bb.0:
-; SLM-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: minps (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_minps:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: minps (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_minps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_minps:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: minps (%rdi), %xmm0 # sched: [9:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_minps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_minps:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: minps (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_minps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_minps:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: minps %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: minps (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_minps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_minps:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: minps %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: minps (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_minps:
-; SKX: # %bb.0:
-; SKX-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_minps:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: minps %xmm1, %xmm0 # sched: [2:1.00]
-; BDVER2-SSE-NEXT: minps (%rdi), %xmm0 # sched: [7:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_minps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BDVER2-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_minps:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: minps %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-SSE-NEXT: minps (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_minps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_minps:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: minps (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_minps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1)
- %2 = load <4 x float>, <4 x float> *%a2, align 16
- %3 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %1, <4 x float> %2)
- ret <4 x float> %3
-}
-declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
-
-define <4 x float> @test_minss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
-; GENERIC-LABEL: test_minss:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: minss (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_minss:
-; ATOM: # %bb.0:
-; ATOM-NEXT: minss %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: minss (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_minss:
-; SLM: # %bb.0:
-; SLM-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: minss (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_minss:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: minss (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_minss:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_minss:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: minss (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_minss:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_minss:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: minss (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_minss:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_minss:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: minss %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: minss (%rdi), %xmm0 # sched: [9:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_minss:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_minss:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: minss %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: minss (%rdi), %xmm0 # sched: [9:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_minss:
-; SKX: # %bb.0:
-; SKX-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_minss:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: minss %xmm1, %xmm0 # sched: [2:1.00]
-; BDVER2-SSE-NEXT: minss (%rdi), %xmm0 # sched: [7:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_minss:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BDVER2-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_minss:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: minss %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-SSE-NEXT: minss (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_minss:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_minss:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: minss (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_minss:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1)
- %2 = load <4 x float>, <4 x float> *%a2, align 16
- %3 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %1, <4 x float> %2)
- ret <4 x float> %3
-}
-declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
-
-define void @test_movaps(<4 x float> *%a0, <4 x float> *%a1) {
-; GENERIC-LABEL: test_movaps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50]
-; GENERIC-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movaps:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movaps (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: addps %xmm0, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movaps:
-; SLM: # %bb.0:
-; SLM-NEXT: movaps (%rdi), %xmm0 # sched: [3:1.00]
-; SLM-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movaps:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50]
-; SANDY-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movaps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50]
-; SANDY-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movaps:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50]
-; HASWELL-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movaps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50]
-; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movaps:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [5:0.50]
-; BROADWELL-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movaps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmovaps (%rdi), %xmm0 # sched: [5:0.50]
-; BROADWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movaps:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50]
-; SKYLAKE-SSE-NEXT: addps %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movaps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50]
-; SKYLAKE-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movaps:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50]
-; SKX-SSE-NEXT: addps %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movaps:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50]
-; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movaps:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [5:0.50]
-; BDVER2-SSE-NEXT: addps %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movaps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovaps (%rdi), %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movaps:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [5:1.00]
-; BTVER2-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movaps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps (%rdi), %xmm0 # sched: [5:1.00]
-; BTVER2-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movaps:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movaps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovaps (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load <4 x float>, <4 x float> *%a0, align 16
- %2 = fadd <4 x float> %1, %1
- store <4 x float> %2, <4 x float> *%a1, align 16
- ret void
-}
-
-; TODO (v)movhlps
-
-define <4 x float> @test_movhlps(<4 x float> %a0, <4 x float> %a1) {
-; GENERIC-LABEL: test_movhlps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movhlps:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movhlps:
-; SLM: # %bb.0:
-; SLM-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movhlps:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movhlps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movhlps:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movhlps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movhlps:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movhlps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movhlps:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movhlps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movhlps:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movhlps:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movhlps:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movhlps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movhlps:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movhlps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movhlps:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movhlps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 6, i32 7, i32 2, i32 3>
- ret <4 x float> %1
-}
-
-; TODO (v)movhps
-
-define <4 x float> @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) {
-; GENERIC-LABEL: test_movhps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
-; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: movhps %xmm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movhps:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.50]
-; ATOM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00]
-; ATOM-NEXT: addps %xmm1, %xmm2 # sched: [5:5.00]
-; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: movhps %xmm2, (%rdi) # sched: [1:1.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movhps:
-; SLM: # %bb.0:
-; SLM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00]
-; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: movhps %xmm0, (%rdi) # sched: [1:1.00]
-; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movhps:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
-; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: movhps %xmm0, (%rdi) # sched: [1:1.00]
-; SANDY-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movhps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
-; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
-; SANDY-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movhps:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: movhps %xmm0, (%rdi) # sched: [1:1.00]
-; HASWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movhps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
-; HASWELL-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movhps:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: movhps %xmm0, (%rdi) # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movhps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
-; BROADWELL-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movhps:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: movhps %xmm0, (%rdi) # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movhps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
-; SKYLAKE-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movhps:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: movhps %xmm0, (%rdi) # sched: [1:1.00]
-; SKX-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movhps:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movhps:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50]
-; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: movhps %xmm0, (%rdi) # sched: [2:1.00]
-; BDVER2-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movhps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50]
-; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: vmovhpd %xmm0, (%rdi) # sched: [2:1.00]
-; BDVER2-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movhps:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: movhps %xmm0, (%rdi) # sched: [2:1.00]
-; BTVER2-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movhps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmovhpd %xmm0, (%rdi) # sched: [2:1.00]
-; BTVER2-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movhps:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
-; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: movhps %xmm0, (%rdi) # sched: [1:0.50]
-; ZNVER1-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movhps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
-; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:0.50]
-; ZNVER1-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = bitcast x86_mmx* %a2 to <2 x float>*
- %2 = load <2 x float>, <2 x float> *%1, align 8
- %3 = shufflevector <2 x float> %2, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %4 = shufflevector <4 x float> %a1, <4 x float> %3, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
- %5 = fadd <4 x float> %a0, %4
- %6 = shufflevector <4 x float> %5, <4 x float> undef, <2 x i32> <i32 2, i32 3>
- store <2 x float> %6, <2 x float>* %1
- ret <4 x float> %4
-}
-
-; TODO (v)movlhps
-
-define <4 x float> @test_movlhps(<4 x float> %a0, <4 x float> %a1) {
-; GENERIC-LABEL: test_movlhps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movlhps:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movlhps:
-; SLM: # %bb.0:
-; SLM-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movlhps:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movlhps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movlhps:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movlhps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movlhps:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movlhps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; BROADWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movlhps:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movlhps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; SKYLAKE-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movlhps:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movlhps:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movlhps:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [2:0.50]
-; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movlhps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [2:0.50]
-; BDVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movlhps:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
-; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movlhps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
-; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movlhps:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
-; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movlhps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
-; ZNVER1-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
- %2 = fadd <4 x float> %a1, %1
- ret <4 x float> %2
-}
-
-define <4 x float> @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) {
-; GENERIC-LABEL: test_movlps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
-; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movlps:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.50]
-; ATOM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00]
-; ATOM-NEXT: addps %xmm1, %xmm2 # sched: [5:5.00]
-; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: movlps %xmm2, (%rdi) # sched: [1:1.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movlps:
-; SLM: # %bb.0:
-; SLM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [4:1.00]
-; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
-; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movlps:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
-; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
-; SANDY-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movlps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
-; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
-; SANDY-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movlps:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
-; HASWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movlps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
-; HASWELL-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movlps:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movlps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
-; BROADWELL-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movlps:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movlps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
-; SKYLAKE-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movlps:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
-; SKX-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movlps:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movlps:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:0.50]
-; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [2:1.00]
-; BDVER2-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movlps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:0.50]
-; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: vmovlps %xmm0, (%rdi) # sched: [2:1.00]
-; BDVER2-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movlps:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [2:1.00]
-; BTVER2-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movlps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmovlps %xmm0, (%rdi) # sched: [2:1.00]
-; BTVER2-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movlps:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50]
-; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:0.50]
-; ZNVER1-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movlps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50]
-; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:0.50]
-; ZNVER1-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = bitcast x86_mmx* %a2 to <2 x float>*
- %2 = load <2 x float>, <2 x float> *%1, align 8
- %3 = shufflevector <2 x float> %2, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %4 = shufflevector <4 x float> %a1, <4 x float> %3, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
- %5 = fadd <4 x float> %a0, %4
- %6 = shufflevector <4 x float> %5, <4 x float> undef, <2 x i32> <i32 0, i32 1>
- store <2 x float> %6, <2 x float>* %1
- ret <4 x float> %4
-}
-
-define i32 @test_movmskps(<4 x float> %a0) {
-; GENERIC-LABEL: test_movmskps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movmskps %xmm0, %eax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movmskps:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movmskps %xmm0, %eax # sched: [3:3.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movmskps:
-; SLM: # %bb.0:
-; SLM-NEXT: movmskps %xmm0, %eax # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movmskps:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movmskps %xmm0, %eax # sched: [2:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movmskps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movmskps:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movmskps %xmm0, %eax # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movmskps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmovmskps %xmm0, %eax # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movmskps:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movmskps %xmm0, %eax # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movmskps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmovmskps %xmm0, %eax # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movmskps:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movmskps %xmm0, %eax # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movmskps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movmskps:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movmskps %xmm0, %eax # sched: [2:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movmskps:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movmskps:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movmskps %xmm0, %eax # sched: [10:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movmskps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovmskps %xmm0, %eax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movmskps:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movmskps %xmm0, %eax # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movmskps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovmskps %xmm0, %eax # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movmskps:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movmskps %xmm0, %eax # sched: [1:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movmskps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovmskps %xmm0, %eax # sched: [1:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
- ret i32 %1
-}
-declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
-
-define void @test_movntps(<4 x float> %a0, <4 x float> *%a1) {
-; GENERIC-LABEL: test_movntps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movntps:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movntps:
-; SLM: # %bb.0:
-; SLM-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movntps:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movntps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movntps:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movntps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movntps:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movntps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movntps:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movntps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movntps:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movntps:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movntps:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [3:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movntps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovntps %xmm0, (%rdi) # sched: [3:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movntps:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movntps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovntps %xmm0, (%rdi) # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movntps:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [1:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movntps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- store <4 x float> %a0, <4 x float> *%a1, align 16, !nontemporal !0
- ret void
-}
-
-define void @test_movss_mem(float* %a0, float* %a1) {
-; GENERIC-LABEL: test_movss_mem:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
-; GENERIC-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movss_mem:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [1:1.00]
-; ATOM-NEXT: addss %xmm0, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movss_mem:
-; SLM: # %bb.0:
-; SLM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [3:1.00]
-; SLM-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movss_mem:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
-; SANDY-SSE-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movss_mem:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
-; SANDY-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movss_mem:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
-; HASWELL-SSE-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movss_mem:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
-; HASWELL-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movss_mem:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
-; BROADWELL-SSE-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movss_mem:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
-; BROADWELL-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movss_mem:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKYLAKE-SSE-NEXT: addss %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movss_mem:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKYLAKE-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movss_mem:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKX-SSE-NEXT: addss %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movss_mem:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKX-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movss_mem:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
-; BDVER2-SSE-NEXT: addss %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: movss %xmm0, (%rsi) # sched: [2:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movss_mem:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
-; BDVER2-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: vmovss %xmm0, (%rsi) # sched: [2:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movss_mem:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00]
-; BTVER2-SSE-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: movss %xmm0, (%rsi) # sched: [2:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movss_mem:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00]
-; BTVER2-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmovss %xmm0, (%rsi) # sched: [2:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movss_mem:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [8:0.50]
-; ZNVER1-SSE-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movss_mem:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [8:0.50]
-; ZNVER1-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vmovss %xmm0, (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load float, float* %a0, align 1
- %2 = fadd float %1, %1
- store float %2, float *%a1, align 1
- ret void
-}
-
-define <4 x float> @test_movss_reg(<4 x float> %a0, <4 x float> %a1) {
-; GENERIC-LABEL: test_movss_reg:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movss_reg:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movss_reg:
-; SLM: # %bb.0:
-; SLM-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movss_reg:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movss_reg:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movss_reg:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movss_reg:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movss_reg:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movss_reg:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movss_reg:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movss_reg:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movss_reg:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movss_reg:
-; SKX: # %bb.0:
-; SKX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movss_reg:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movss_reg:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movss_reg:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movss_reg:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movss_reg:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movss_reg:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
- ret <4 x float> %1
-}
-
-define void @test_movups(<4 x float> *%a0, <4 x float> *%a1) {
-; GENERIC-LABEL: test_movups:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50]
-; GENERIC-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movups:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movups (%rdi), %xmm0 # sched: [3:1.50]
-; ATOM-NEXT: addps %xmm0, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: movups %xmm0, (%rsi) # sched: [2:1.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movups:
-; SLM: # %bb.0:
-; SLM-NEXT: movups (%rdi), %xmm0 # sched: [3:1.00]
-; SLM-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movups:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50]
-; SANDY-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movups:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
-; SANDY-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movups:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50]
-; HASWELL-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movups:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
-; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movups:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movups (%rdi), %xmm0 # sched: [5:0.50]
-; BROADWELL-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movups:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmovups (%rdi), %xmm0 # sched: [5:0.50]
-; BROADWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movups:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50]
-; SKYLAKE-SSE-NEXT: addps %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movups:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
-; SKYLAKE-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movups:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50]
-; SKX-SSE-NEXT: addps %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movups:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
-; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movups:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movups (%rdi), %xmm0 # sched: [5:0.50]
-; BDVER2-SSE-NEXT: addps %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movups:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovups (%rdi), %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movups:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movups (%rdi), %xmm0 # sched: [5:1.00]
-; BTVER2-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movups:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovups (%rdi), %xmm0 # sched: [5:1.00]
-; BTVER2-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movups:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movups (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movups:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovups (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vmovups %xmm0, (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load <4 x float>, <4 x float> *%a0, align 1
- %2 = fadd <4 x float> %1, %1
- store <4 x float> %2, <4 x float> *%a1, align 1
- ret void
-}
-
-define <4 x float> @test_mulps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
-; GENERIC-LABEL: test_mulps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: mulps %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: mulps (%rdi), %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_mulps:
-; ATOM: # %bb.0:
-; ATOM-NEXT: mulps %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: mulps (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_mulps:
-; SLM: # %bb.0:
-; SLM-NEXT: mulps %xmm1, %xmm0 # sched: [5:2.00]
-; SLM-NEXT: mulps (%rdi), %xmm0 # sched: [8:2.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_mulps:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [11:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_mulps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_mulps:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [5:0.50]
-; HASWELL-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [11:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_mulps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [11:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_mulps:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [3:0.50]
-; BROADWELL-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [8:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_mulps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; BROADWELL-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_mulps:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_mulps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_mulps:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_mulps:
-; SKX: # %bb.0:
-; SKX-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_mulps:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [10:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_mulps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_mulps:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_mulps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_mulps:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [3:0.50]
-; ZNVER1-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [10:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_mulps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; ZNVER1-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fmul <4 x float> %a0, %a1
- %2 = load <4 x float>, <4 x float> *%a2, align 16
- %3 = fmul <4 x float> %1, %2
- ret <4 x float> %3
-}
-
-define float @test_mulss(float %a0, float %a1, float *%a2) {
-; GENERIC-LABEL: test_mulss:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: mulss %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: mulss (%rdi), %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_mulss:
-; ATOM: # %bb.0:
-; ATOM-NEXT: mulss %xmm1, %xmm0 # sched: [4:4.00]
-; ATOM-NEXT: mulss (%rdi), %xmm0 # sched: [4:4.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_mulss:
-; SLM: # %bb.0:
-; SLM-NEXT: mulss %xmm1, %xmm0 # sched: [5:2.00]
-; SLM-NEXT: mulss (%rdi), %xmm0 # sched: [8:2.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_mulss:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [11:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_mulss:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_mulss:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [5:0.50]
-; HASWELL-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [10:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_mulss:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_mulss:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [3:0.50]
-; BROADWELL-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [8:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_mulss:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; BROADWELL-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_mulss:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [9:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_mulss:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_mulss:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [9:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_mulss:
-; SKX: # %bb.0:
-; SKX-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_mulss:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [10:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_mulss:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_mulss:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_mulss:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_mulss:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [3:0.50]
-; ZNVER1-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [10:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_mulss:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; ZNVER1-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fmul float %a0, %a1
- %2 = load float, float *%a2, align 4
- %3 = fmul float %1, %2
- ret float %3
-}
-
-define <4 x float> @test_orps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
-; GENERIC-LABEL: test_orps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: orps (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_orps:
-; ATOM: # %bb.0:
-; ATOM-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: orps (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_orps:
-; SLM: # %bb.0:
-; SLM-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: orps (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_orps:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT: orps (%rdi), %xmm0 # sched: [7:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_orps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_orps:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: orps (%rdi), %xmm0 # sched: [7:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_orps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_orps:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: orps (%rdi), %xmm0 # sched: [6:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_orps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_orps:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: orps (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_orps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_orps:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: orps (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_orps:
-; SKX: # %bb.0:
-; SKX-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_orps:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: orps %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: orps (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_orps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_orps:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: orps (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_orps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_orps:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: orps (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_orps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = bitcast <4 x float> %a0 to <4 x i32>
- %2 = bitcast <4 x float> %a1 to <4 x i32>
- %3 = or <4 x i32> %1, %2
- %4 = load <4 x float>, <4 x float> *%a2, align 16
- %5 = bitcast <4 x float> %4 to <4 x i32>
- %6 = or <4 x i32> %3, %5
- %7 = bitcast <4 x i32> %6 to <4 x float>
- ret <4 x float> %7
-}
-
-define void @test_prefetch(i8* %a0) optsize {
-; GENERIC-LABEL: test_prefetch:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
-; GENERIC-NEXT: prefetcht0 (%rdi) # sched: [5:0.50]
-; GENERIC-NEXT: prefetcht1 (%rdi) # sched: [5:0.50]
-; GENERIC-NEXT: prefetcht2 (%rdi) # sched: [5:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_prefetch:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: prefetchnta (%rdi) # sched: [1:1.00]
-; ATOM-NEXT: prefetcht0 (%rdi) # sched: [1:1.00]
-; ATOM-NEXT: prefetcht1 (%rdi) # sched: [1:1.00]
-; ATOM-NEXT: prefetcht2 (%rdi) # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_prefetch:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: prefetchnta (%rdi) # sched: [3:1.00]
-; SLM-NEXT: prefetcht0 (%rdi) # sched: [3:1.00]
-; SLM-NEXT: prefetcht1 (%rdi) # sched: [3:1.00]
-; SLM-NEXT: prefetcht2 (%rdi) # sched: [3:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_prefetch:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: #APP
-; SANDY-SSE-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
-; SANDY-SSE-NEXT: prefetcht0 (%rdi) # sched: [5:0.50]
-; SANDY-SSE-NEXT: prefetcht1 (%rdi) # sched: [5:0.50]
-; SANDY-SSE-NEXT: prefetcht2 (%rdi) # sched: [5:0.50]
-; SANDY-SSE-NEXT: #NO_APP
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_prefetch:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
-; SANDY-NEXT: prefetcht0 (%rdi) # sched: [5:0.50]
-; SANDY-NEXT: prefetcht1 (%rdi) # sched: [5:0.50]
-; SANDY-NEXT: prefetcht2 (%rdi) # sched: [5:0.50]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_prefetch:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: #APP
-; HASWELL-SSE-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
-; HASWELL-SSE-NEXT: prefetcht0 (%rdi) # sched: [5:0.50]
-; HASWELL-SSE-NEXT: prefetcht1 (%rdi) # sched: [5:0.50]
-; HASWELL-SSE-NEXT: prefetcht2 (%rdi) # sched: [5:0.50]
-; HASWELL-SSE-NEXT: #NO_APP
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_prefetch:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
-; HASWELL-NEXT: prefetcht0 (%rdi) # sched: [5:0.50]
-; HASWELL-NEXT: prefetcht1 (%rdi) # sched: [5:0.50]
-; HASWELL-NEXT: prefetcht2 (%rdi) # sched: [5:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_prefetch:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: #APP
-; BROADWELL-SSE-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
-; BROADWELL-SSE-NEXT: prefetcht0 (%rdi) # sched: [5:0.50]
-; BROADWELL-SSE-NEXT: prefetcht1 (%rdi) # sched: [5:0.50]
-; BROADWELL-SSE-NEXT: prefetcht2 (%rdi) # sched: [5:0.50]
-; BROADWELL-SSE-NEXT: #NO_APP
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_prefetch:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
-; BROADWELL-NEXT: prefetcht0 (%rdi) # sched: [5:0.50]
-; BROADWELL-NEXT: prefetcht1 (%rdi) # sched: [5:0.50]
-; BROADWELL-NEXT: prefetcht2 (%rdi) # sched: [5:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_prefetch:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: #APP
-; SKYLAKE-SSE-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
-; SKYLAKE-SSE-NEXT: prefetcht0 (%rdi) # sched: [5:0.50]
-; SKYLAKE-SSE-NEXT: prefetcht1 (%rdi) # sched: [5:0.50]
-; SKYLAKE-SSE-NEXT: prefetcht2 (%rdi) # sched: [5:0.50]
-; SKYLAKE-SSE-NEXT: #NO_APP
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_prefetch:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
-; SKYLAKE-NEXT: prefetcht0 (%rdi) # sched: [5:0.50]
-; SKYLAKE-NEXT: prefetcht1 (%rdi) # sched: [5:0.50]
-; SKYLAKE-NEXT: prefetcht2 (%rdi) # sched: [5:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_prefetch:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: #APP
-; SKX-SSE-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
-; SKX-SSE-NEXT: prefetcht0 (%rdi) # sched: [5:0.50]
-; SKX-SSE-NEXT: prefetcht1 (%rdi) # sched: [5:0.50]
-; SKX-SSE-NEXT: prefetcht2 (%rdi) # sched: [5:0.50]
-; SKX-SSE-NEXT: #NO_APP
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_prefetch:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
-; SKX-NEXT: prefetcht0 (%rdi) # sched: [5:0.50]
-; SKX-NEXT: prefetcht1 (%rdi) # sched: [5:0.50]
-; SKX-NEXT: prefetcht2 (%rdi) # sched: [5:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_prefetch:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: #APP
-; BDVER2-SSE-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
-; BDVER2-SSE-NEXT: prefetcht0 (%rdi) # sched: [5:0.50]
-; BDVER2-SSE-NEXT: prefetcht1 (%rdi) # sched: [5:0.50]
-; BDVER2-SSE-NEXT: prefetcht2 (%rdi) # sched: [5:0.50]
-; BDVER2-SSE-NEXT: #NO_APP
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_prefetch:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
-; BDVER2-NEXT: prefetcht0 (%rdi) # sched: [5:0.50]
-; BDVER2-NEXT: prefetcht1 (%rdi) # sched: [5:0.50]
-; BDVER2-NEXT: prefetcht2 (%rdi) # sched: [5:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_prefetch:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: #APP
-; BTVER2-SSE-NEXT: prefetchnta (%rdi) # sched: [3:1.00]
-; BTVER2-SSE-NEXT: prefetcht0 (%rdi) # sched: [3:1.00]
-; BTVER2-SSE-NEXT: prefetcht1 (%rdi) # sched: [3:1.00]
-; BTVER2-SSE-NEXT: prefetcht2 (%rdi) # sched: [3:1.00]
-; BTVER2-SSE-NEXT: #NO_APP
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_prefetch:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: prefetchnta (%rdi) # sched: [3:1.00]
-; BTVER2-NEXT: prefetcht0 (%rdi) # sched: [3:1.00]
-; BTVER2-NEXT: prefetcht1 (%rdi) # sched: [3:1.00]
-; BTVER2-NEXT: prefetcht2 (%rdi) # sched: [3:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_prefetch:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: #APP
-; ZNVER1-SSE-NEXT: prefetchnta (%rdi) # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: prefetcht0 (%rdi) # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: prefetcht1 (%rdi) # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: prefetcht2 (%rdi) # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: #NO_APP
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_prefetch:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: prefetchnta (%rdi) # sched: [8:0.50]
-; ZNVER1-NEXT: prefetcht0 (%rdi) # sched: [8:0.50]
-; ZNVER1-NEXT: prefetcht1 (%rdi) # sched: [8:0.50]
-; ZNVER1-NEXT: prefetcht2 (%rdi) # sched: [8:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void asm sideeffect "prefetchnta $0 \0A\09 prefetcht0 $0 \0A\09 prefetcht1 $0 \0A\09 prefetcht2 $0", "*m"(i8 *%a0)
- ret void
-}
-
-define <4 x float> @test_rcpps(<4 x float> %a0, <4 x float> *%a1) {
-; GENERIC-LABEL: test_rcpps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: rcpps %xmm0, %xmm1 # sched: [5:1.00]
-; GENERIC-NEXT: rcpps (%rdi), %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_rcpps:
-; ATOM: # %bb.0:
-; ATOM-NEXT: rcpps (%rdi), %xmm1 # sched: [10:5.00]
-; ATOM-NEXT: rcpps %xmm0, %xmm0 # sched: [9:4.50]
-; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00]
-; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_rcpps:
-; SLM: # %bb.0:
-; SLM-NEXT: rcpps (%rdi), %xmm1 # sched: [8:1.00]
-; SLM-NEXT: rcpps %xmm0, %xmm0 # sched: [5:1.00]
-; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_rcpps:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [5:1.00]
-; SANDY-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [11:1.00]
-; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_rcpps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vrcpps (%rdi), %xmm1 # sched: [11:1.00]
-; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_rcpps:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [11:1.00]
-; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_rcpps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT: vrcpps (%rdi), %xmm1 # sched: [11:1.00]
-; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_rcpps:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [5:1.00]
-; BROADWELL-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [10:1.00]
-; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_rcpps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00]
-; BROADWELL-NEXT: vrcpps (%rdi), %xmm1 # sched: [10:1.00]
-; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_rcpps:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [4:1.00]
-; SKYLAKE-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [10:1.00]
-; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_rcpps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vrcpps %xmm0, %xmm0 # sched: [4:1.00]
-; SKYLAKE-NEXT: vrcpps (%rdi), %xmm1 # sched: [10:1.00]
-; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_rcpps:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [4:1.00]
-; SKX-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [10:1.00]
-; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_rcpps:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcpps %xmm0, %xmm0 # sched: [4:1.00]
-; SKX-NEXT: vrcpps (%rdi), %xmm1 # sched: [10:1.00]
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_rcpps:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [10:1.00]
-; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_rcpps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrcpps (%rdi), %xmm1 # sched: [10:1.00]
-; BDVER2-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_rcpps:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [2:1.00]
-; BTVER2-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_rcpps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vrcpps (%rdi), %xmm1 # sched: [7:1.00]
-; BTVER2-NEXT: vrcpps %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_rcpps:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [5:0.50]
-; ZNVER1-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [12:0.50]
-; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_rcpps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vrcpps (%rdi), %xmm1 # sched: [12:0.50]
-; ZNVER1-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:0.50]
-; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0)
- %2 = load <4 x float>, <4 x float> *%a1, align 16
- %3 = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %2)
- %4 = fadd <4 x float> %1, %3
- ret <4 x float> %4
-}
-declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
-
-; TODO - rcpss_m
-
-define <4 x float> @test_rcpss(float %a0, float *%a1) {
-; GENERIC-LABEL: test_rcpss:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: rcpss %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
-; GENERIC-NEXT: rcpss %xmm1, %xmm1 # sched: [5:1.00]
-; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_rcpss:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:1.00]
-; ATOM-NEXT: rcpss %xmm0, %xmm0 # sched: [4:4.00]
-; ATOM-NEXT: rcpss %xmm1, %xmm1 # sched: [4:4.00]
-; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_rcpss:
-; SLM: # %bb.0:
-; SLM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [3:1.00]
-; SLM-NEXT: rcpss %xmm0, %xmm0 # sched: [5:1.00]
-; SLM-NEXT: rcpss %xmm1, %xmm1 # sched: [5:1.00]
-; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_rcpss:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
-; SANDY-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [5:1.00]
-; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_rcpss:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
-; SANDY-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_rcpss:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; HASWELL-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [5:1.00]
-; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_rcpss:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; HASWELL-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_rcpss:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [5:1.00]
-; BROADWELL-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; BROADWELL-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [5:1.00]
-; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_rcpss:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; BROADWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; BROADWELL-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
-; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_rcpss:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [4:1.00]
-; SKYLAKE-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKYLAKE-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [4:1.00]
-; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_rcpss:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
-; SKYLAKE-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKYLAKE-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
-; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_rcpss:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [4:1.00]
-; SKX-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKX-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [4:1.00]
-; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_rcpss:
-; SKX: # %bb.0:
-; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
-; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKX-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_rcpss:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; BDVER2-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_rcpss:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
-; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_rcpss:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
-; BTVER2-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [2:1.00]
-; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_rcpss:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
-; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [2:1.00]
-; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_rcpss:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50]
-; ZNVER1-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [5:0.50]
-; ZNVER1-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [5:0.50]
-; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_rcpss:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50]
-; ZNVER1-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:0.50]
-; ZNVER1-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:0.50]
-; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = insertelement <4 x float> undef, float %a0, i32 0
- %2 = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %1)
- %3 = load float, float *%a1, align 4
- %4 = insertelement <4 x float> undef, float %3, i32 0
- %5 = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4)
- %6 = fadd <4 x float> %2, %5
- ret <4 x float> %6
-}
-declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
-
-define <4 x float> @test_rsqrtps(<4 x float> %a0, <4 x float> *%a1) {
-; GENERIC-LABEL: test_rsqrtps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: rsqrtps %xmm0, %xmm1 # sched: [5:1.00]
-; GENERIC-NEXT: rsqrtps (%rdi), %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_rsqrtps:
-; ATOM: # %bb.0:
-; ATOM-NEXT: rsqrtps (%rdi), %xmm1 # sched: [10:5.00]
-; ATOM-NEXT: rsqrtps %xmm0, %xmm0 # sched: [9:4.50]
-; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00]
-; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_rsqrtps:
-; SLM: # %bb.0:
-; SLM-NEXT: rsqrtps (%rdi), %xmm1 # sched: [8:1.00]
-; SLM-NEXT: rsqrtps %xmm0, %xmm0 # sched: [5:1.00]
-; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_rsqrtps:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [5:1.00]
-; SANDY-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [11:1.00]
-; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_rsqrtps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [11:1.00]
-; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_rsqrtps:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [5:1.00]
-; HASWELL-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [11:1.00]
-; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_rsqrtps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [11:1.00]
-; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_rsqrtps:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [5:1.00]
-; BROADWELL-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [10:1.00]
-; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_rsqrtps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
-; BROADWELL-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [10:1.00]
-; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_rsqrtps:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [4:1.00]
-; SKYLAKE-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [10:1.00]
-; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_rsqrtps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [4:1.00]
-; SKYLAKE-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [10:1.00]
-; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_rsqrtps:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [4:1.00]
-; SKX-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [10:1.00]
-; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_rsqrtps:
-; SKX: # %bb.0:
-; SKX-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [4:1.00]
-; SKX-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [10:1.00]
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_rsqrtps:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [10:1.00]
-; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_rsqrtps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [10:1.00]
-; BDVER2-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_rsqrtps:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [2:1.00]
-; BTVER2-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_rsqrtps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [7:1.00]
-; BTVER2-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_rsqrtps:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [5:0.50]
-; ZNVER1-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [12:0.50]
-; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_rsqrtps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [12:0.50]
-; ZNVER1-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:0.50]
-; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0)
- %2 = load <4 x float>, <4 x float> *%a1, align 16
- %3 = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %2)
- %4 = fadd <4 x float> %1, %3
- ret <4 x float> %4
-}
-declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
-
-; TODO - rsqrtss_m
-
-define <4 x float> @test_rsqrtss(float %a0, float *%a1) {
-; GENERIC-LABEL: test_rsqrtss:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: rsqrtss %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
-; GENERIC-NEXT: rsqrtss %xmm1, %xmm1 # sched: [5:1.00]
-; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_rsqrtss:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:1.00]
-; ATOM-NEXT: rsqrtss %xmm0, %xmm0 # sched: [4:4.00]
-; ATOM-NEXT: rsqrtss %xmm1, %xmm1 # sched: [4:4.00]
-; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_rsqrtss:
-; SLM: # %bb.0:
-; SLM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [3:1.00]
-; SLM-NEXT: rsqrtss %xmm0, %xmm0 # sched: [5:1.00]
-; SLM-NEXT: rsqrtss %xmm1, %xmm1 # sched: [5:1.00]
-; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_rsqrtss:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
-; SANDY-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [5:1.00]
-; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_rsqrtss:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
-; SANDY-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
-; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_rsqrtss:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; HASWELL-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [5:1.00]
-; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_rsqrtss:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; HASWELL-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
-; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_rsqrtss:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [5:1.00]
-; BROADWELL-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; BROADWELL-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [5:1.00]
-; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_rsqrtss:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; BROADWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; BROADWELL-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
-; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_rsqrtss:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [4:1.00]
-; SKYLAKE-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKYLAKE-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [4:1.00]
-; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_rsqrtss:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
-; SKYLAKE-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKYLAKE-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
-; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_rsqrtss:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [4:1.00]
-; SKX-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKX-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [4:1.00]
-; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_rsqrtss:
-; SKX: # %bb.0:
-; SKX-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
-; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKX-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_rsqrtss:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; BDVER2-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_rsqrtss:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; BDVER2-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
-; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_rsqrtss:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
-; BTVER2-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [2:1.00]
-; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_rsqrtss:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
-; BTVER2-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [2:1.00]
-; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_rsqrtss:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50]
-; ZNVER1-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [5:0.50]
-; ZNVER1-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [5:0.50]
-; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_rsqrtss:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50]
-; ZNVER1-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:0.50]
-; ZNVER1-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:0.50]
-; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = insertelement <4 x float> undef, float %a0, i32 0
- %2 = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %1)
- %3 = load float, float *%a1, align 4
- %4 = insertelement <4 x float> undef, float %3, i32 0
- %5 = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4)
- %6 = fadd <4 x float> %2, %5
- ret <4 x float> %6
-}
-declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
-
-define void @test_sfence() {
-; GENERIC-LABEL: test_sfence:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: sfence # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_sfence:
-; ATOM: # %bb.0:
-; ATOM-NEXT: sfence # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_sfence:
-; SLM: # %bb.0:
-; SLM-NEXT: sfence # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_sfence:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: sfence # sched: [1:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_sfence:
-; SANDY: # %bb.0:
-; SANDY-NEXT: sfence # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_sfence:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: sfence # sched: [2:0.33]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_sfence:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: sfence # sched: [2:0.33]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_sfence:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: sfence # sched: [2:0.33]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_sfence:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: sfence # sched: [2:0.33]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_sfence:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: sfence # sched: [2:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_sfence:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: sfence # sched: [2:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_sfence:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: sfence # sched: [2:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_sfence:
-; SKX: # %bb.0:
-; SKX-NEXT: sfence # sched: [2:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_sfence:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: sfence # sched: [1:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_sfence:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: sfence # sched: [1:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_sfence:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: sfence # sched: [1:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_sfence:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: sfence # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_sfence:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: sfence # sched: [1:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_sfence:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: sfence # sched: [1:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void @llvm.x86.sse.sfence()
- ret void
-}
-declare void @llvm.x86.sse.sfence() nounwind readnone
-
-define <4 x float> @test_shufps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) nounwind {
-; GENERIC-LABEL: test_shufps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
-; GENERIC-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00]
-; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_shufps:
-; ATOM: # %bb.0:
-; ATOM-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
-; ATOM-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [1:1.00]
-; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_shufps:
-; SLM: # %bb.0:
-; SLM-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
-; SLM-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [4:1.00]
-; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_shufps:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
-; SANDY-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00]
-; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_shufps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
-; SANDY-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00]
-; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_shufps:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
-; HASWELL-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00]
-; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_shufps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
-; HASWELL-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00]
-; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_shufps:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
-; BROADWELL-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [6:1.00]
-; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_shufps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
-; BROADWELL-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [6:1.00]
-; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_shufps:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00]
-; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_shufps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
-; SKYLAKE-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00]
-; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_shufps:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
-; SKX-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00]
-; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_shufps:
-; SKX: # %bb.0:
-; SKX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
-; SKX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00]
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_shufps:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [2:0.50]
-; BDVER2-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:0.50]
-; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_shufps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [2:0.50]
-; BDVER2-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:0.50]
-; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_shufps:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50]
-; BTVER2-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [6:1.00]
-; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_shufps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50]
-; BTVER2-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [6:1.00]
-; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_shufps:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50]
-; ZNVER1-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [8:0.50]
-; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_shufps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50]
-; ZNVER1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [8:0.50]
-; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 0, i32 4, i32 4>
- %2 = load <4 x float>, <4 x float> *%a2, align 16
- %3 = shufflevector <4 x float> %a1, <4 x float> %2, <4 x i32> <i32 0, i32 3, i32 4, i32 4>
- %4 = fadd <4 x float> %1, %3
- ret <4 x float> %4
-}
-
-define <4 x float> @test_sqrtps(<4 x float> %a0, <4 x float> *%a1) {
-; GENERIC-LABEL: test_sqrtps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: sqrtps %xmm0, %xmm1 # sched: [14:14.00]
-; GENERIC-NEXT: sqrtps (%rdi), %xmm0 # sched: [20:14.00]
-; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_sqrtps:
-; ATOM: # %bb.0:
-; ATOM-NEXT: sqrtps %xmm0, %xmm1 # sched: [70:35.00]
-; ATOM-NEXT: sqrtps (%rdi), %xmm0 # sched: [70:35.00]
-; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_sqrtps:
-; SLM: # %bb.0:
-; SLM-NEXT: sqrtps (%rdi), %xmm1 # sched: [44:40.00]
-; SLM-NEXT: sqrtps %xmm0, %xmm0 # sched: [41:40.00]
-; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_sqrtps:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [14:14.00]
-; SANDY-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [20:14.00]
-; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_sqrtps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:14.00]
-; SANDY-NEXT: vsqrtps (%rdi), %xmm1 # sched: [20:14.00]
-; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_sqrtps:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [11:7.00]
-; HASWELL-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [17:7.00]
-; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_sqrtps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [11:7.00]
-; HASWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [17:7.00]
-; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_sqrtps:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [11:7.00]
-; BROADWELL-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [16:7.00]
-; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_sqrtps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [11:7.00]
-; BROADWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [16:7.00]
-; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_sqrtps:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [12:3.00]
-; SKYLAKE-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [18:3.00]
-; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_sqrtps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vsqrtps %xmm0, %xmm0 # sched: [12:3.00]
-; SKYLAKE-NEXT: vsqrtps (%rdi), %xmm1 # sched: [18:3.00]
-; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_sqrtps:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [12:3.00]
-; SKX-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [18:3.00]
-; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_sqrtps:
-; SKX: # %bb.0:
-; SKX-NEXT: vsqrtps %xmm0, %xmm0 # sched: [12:3.00]
-; SKX-NEXT: vsqrtps (%rdi), %xmm1 # sched: [18:3.00]
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_sqrtps:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [9:10.50]
-; BDVER2-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [14:10.50]
-; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_sqrtps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vsqrtps (%rdi), %xmm1 # sched: [14:10.50]
-; BDVER2-NEXT: vsqrtps %xmm0, %xmm0 # sched: [9:10.50]
-; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_sqrtps:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [21:21.00]
-; BTVER2-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [26:21.00]
-; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_sqrtps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vsqrtps (%rdi), %xmm1 # sched: [26:21.00]
-; BTVER2-NEXT: vsqrtps %xmm0, %xmm0 # sched: [21:21.00]
-; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_sqrtps:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [20:20.00]
-; ZNVER1-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [27:20.00]
-; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_sqrtps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vsqrtps (%rdi), %xmm1 # sched: [27:20.00]
-; ZNVER1-NEXT: vsqrtps %xmm0, %xmm0 # sched: [20:20.00]
-; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0)
- %2 = load <4 x float>, <4 x float> *%a1, align 16
- %3 = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %2)
- %4 = fadd <4 x float> %1, %3
- ret <4 x float> %4
-}
-declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
-
-; TODO - sqrtss_m
-
-define <4 x float> @test_sqrtss(<4 x float> %a0, <4 x float> *%a1) {
-; GENERIC-LABEL: test_sqrtss:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: sqrtss %xmm0, %xmm0 # sched: [14:14.00]
-; GENERIC-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50]
-; GENERIC-NEXT: sqrtss %xmm1, %xmm1 # sched: [14:14.00]
-; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_sqrtss:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movaps (%rdi), %xmm1 # sched: [1:1.00]
-; ATOM-NEXT: sqrtss %xmm0, %xmm0 # sched: [34:17.00]
-; ATOM-NEXT: sqrtss %xmm1, %xmm1 # sched: [34:17.00]
-; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_sqrtss:
-; SLM: # %bb.0:
-; SLM-NEXT: movaps (%rdi), %xmm1 # sched: [3:1.00]
-; SLM-NEXT: sqrtss %xmm0, %xmm0 # sched: [20:20.00]
-; SLM-NEXT: sqrtss %xmm1, %xmm1 # sched: [20:20.00]
-; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_sqrtss:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [14:14.00]
-; SANDY-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50]
-; SANDY-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [14:14.00]
-; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_sqrtss:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:14.00]
-; SANDY-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50]
-; SANDY-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [14:14.00]
-; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_sqrtss:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [11:7.00]
-; HASWELL-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50]
-; HASWELL-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [11:7.00]
-; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_sqrtss:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [11:7.00]
-; HASWELL-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50]
-; HASWELL-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [11:7.00]
-; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_sqrtss:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [11:4.00]
-; BROADWELL-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [5:0.50]
-; BROADWELL-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [11:4.00]
-; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_sqrtss:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [11:4.00]
-; BROADWELL-NEXT: vmovaps (%rdi), %xmm1 # sched: [5:0.50]
-; BROADWELL-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [11:4.00]
-; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_sqrtss:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [12:3.00]
-; SKYLAKE-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50]
-; SKYLAKE-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [12:3.00]
-; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_sqrtss:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:3.00]
-; SKYLAKE-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50]
-; SKYLAKE-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [12:3.00]
-; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_sqrtss:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [12:3.00]
-; SKX-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50]
-; SKX-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [12:3.00]
-; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_sqrtss:
-; SKX: # %bb.0:
-; SKX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:3.00]
-; SKX-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50]
-; SKX-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [12:3.00]
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_sqrtss:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [5:0.50]
-; BDVER2-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [9:10.50]
-; BDVER2-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [9:10.50]
-; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_sqrtss:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovaps (%rdi), %xmm1 # sched: [5:0.50]
-; BDVER2-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [9:10.50]
-; BDVER2-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [9:10.50]
-; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_sqrtss:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [5:1.00]
-; BTVER2-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [21:21.00]
-; BTVER2-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [21:21.00]
-; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_sqrtss:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovaps (%rdi), %xmm1 # sched: [5:1.00]
-; BTVER2-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [21:21.00]
-; BTVER2-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [21:21.00]
-; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_sqrtss:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [20:20.00]
-; ZNVER1-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [20:20.00]
-; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_sqrtss:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovaps (%rdi), %xmm1 # sched: [8:0.50]
-; ZNVER1-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [20:20.00]
-; ZNVER1-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [20:20.00]
-; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0)
- %2 = load <4 x float>, <4 x float> *%a1, align 16
- %3 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %2)
- %4 = fadd <4 x float> %1, %3
- ret <4 x float> %4
-}
-declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
-
-define i32 @test_stmxcsr() {
-; GENERIC-LABEL: test_stmxcsr:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
-; GENERIC-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_stmxcsr:
-; ATOM: # %bb.0:
-; ATOM-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [15:7.50]
-; ATOM-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [1:1.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_stmxcsr:
-; SLM: # %bb.0:
-; SLM-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
-; SLM-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [3:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_stmxcsr:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
-; SANDY-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_stmxcsr:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
-; SANDY-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_stmxcsr:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
-; HASWELL-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_stmxcsr:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
-; HASWELL-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_stmxcsr:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
-; BROADWELL-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_stmxcsr:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
-; BROADWELL-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_stmxcsr:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_stmxcsr:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
-; SKYLAKE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_stmxcsr:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
-; SKX-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_stmxcsr:
-; SKX: # %bb.0:
-; SKX-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00]
-; SKX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_stmxcsr:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
-; BDVER2-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_stmxcsr:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
-; BDVER2-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_stmxcsr:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
-; BTVER2-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_stmxcsr:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
-; BTVER2-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_stmxcsr:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_stmxcsr:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [100:0.25]
-; ZNVER1-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = alloca i32, align 4
- %2 = bitcast i32* %1 to i8*
- call void @llvm.x86.sse.stmxcsr(i8* %2)
- %3 = load i32, i32* %1, align 4
- ret i32 %3
-}
-declare void @llvm.x86.sse.stmxcsr(i8*) nounwind readnone
-
-define <4 x float> @test_subps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
-; GENERIC-LABEL: test_subps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: subps (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_subps:
-; ATOM: # %bb.0:
-; ATOM-NEXT: subps %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: subps (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_subps:
-; SLM: # %bb.0:
-; SLM-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: subps (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_subps:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: subps (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_subps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_subps:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: subps (%rdi), %xmm0 # sched: [9:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_subps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_subps:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: subps (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_subps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_subps:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: subps %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: subps (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_subps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_subps:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: subps %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: subps (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_subps:
-; SKX: # %bb.0:
-; SKX-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_subps:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: subps %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: subps (%rdi), %xmm0 # sched: [10:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_subps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_subps:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: subps (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_subps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_subps:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: subps (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_subps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fsub <4 x float> %a0, %a1
- %2 = load <4 x float>, <4 x float> *%a2, align 16
- %3 = fsub <4 x float> %1, %2
- ret <4 x float> %3
-}
-
-define float @test_subss(float %a0, float %a1, float *%a2) {
-; GENERIC-LABEL: test_subss:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: subss (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_subss:
-; ATOM: # %bb.0:
-; ATOM-NEXT: subss %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: subss (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_subss:
-; SLM: # %bb.0:
-; SLM-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: subss (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_subss:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: subss (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_subss:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_subss:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: subss (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_subss:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_subss:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: subss (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_subss:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_subss:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: subss %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: subss (%rdi), %xmm0 # sched: [9:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_subss:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_subss:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: subss %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: subss (%rdi), %xmm0 # sched: [9:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_subss:
-; SKX: # %bb.0:
-; SKX-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_subss:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: subss %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: subss (%rdi), %xmm0 # sched: [10:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_subss:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_subss:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: subss (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_subss:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_subss:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: subss (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_subss:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fsub float %a0, %a1
- %2 = load float, float *%a2, align 4
- %3 = fsub float %1, %2
- ret float %3
-}
-
-define i32 @test_ucomiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
-; GENERIC-LABEL: test_ucomiss:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: ucomiss %xmm1, %xmm0 # sched: [2:1.00]
-; GENERIC-NEXT: setnp %al # sched: [1:0.50]
-; GENERIC-NEXT: sete %cl # sched: [1:0.50]
-; GENERIC-NEXT: andb %al, %cl # sched: [1:0.33]
-; GENERIC-NEXT: ucomiss (%rdi), %xmm0 # sched: [8:1.00]
-; GENERIC-NEXT: setnp %al # sched: [1:0.50]
-; GENERIC-NEXT: sete %dl # sched: [1:0.50]
-; GENERIC-NEXT: andb %al, %dl # sched: [1:0.33]
-; GENERIC-NEXT: orb %cl, %dl # sched: [1:0.33]
-; GENERIC-NEXT: movzbl %dl, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_ucomiss:
-; ATOM: # %bb.0:
-; ATOM-NEXT: ucomiss %xmm1, %xmm0 # sched: [9:4.50]
-; ATOM-NEXT: setnp %al # sched: [1:0.50]
-; ATOM-NEXT: sete %cl # sched: [1:0.50]
-; ATOM-NEXT: andb %al, %cl # sched: [1:0.50]
-; ATOM-NEXT: ucomiss (%rdi), %xmm0 # sched: [10:5.00]
-; ATOM-NEXT: setnp %al # sched: [1:0.50]
-; ATOM-NEXT: sete %dl # sched: [1:0.50]
-; ATOM-NEXT: andb %al, %dl # sched: [1:0.50]
-; ATOM-NEXT: orb %cl, %dl # sched: [1:0.50]
-; ATOM-NEXT: movzbl %dl, %eax # sched: [1:1.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_ucomiss:
-; SLM: # %bb.0:
-; SLM-NEXT: ucomiss %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: setnp %al # sched: [1:0.50]
-; SLM-NEXT: sete %cl # sched: [1:0.50]
-; SLM-NEXT: andb %al, %cl # sched: [1:0.50]
-; SLM-NEXT: ucomiss (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT: setnp %al # sched: [1:0.50]
-; SLM-NEXT: sete %dl # sched: [1:0.50]
-; SLM-NEXT: andb %al, %dl # sched: [1:0.50]
-; SLM-NEXT: orb %cl, %dl # sched: [1:0.50]
-; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_ucomiss:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [2:1.00]
-; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50]
-; SANDY-SSE-NEXT: sete %cl # sched: [1:0.50]
-; SANDY-SSE-NEXT: andb %al, %cl # sched: [1:0.33]
-; SANDY-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [8:1.00]
-; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50]
-; SANDY-SSE-NEXT: sete %dl # sched: [1:0.50]
-; SANDY-SSE-NEXT: andb %al, %dl # sched: [1:0.33]
-; SANDY-SSE-NEXT: orb %cl, %dl # sched: [1:0.33]
-; SANDY-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_ucomiss:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vucomiss %xmm1, %xmm0 # sched: [2:1.00]
-; SANDY-NEXT: setnp %al # sched: [1:0.50]
-; SANDY-NEXT: sete %cl # sched: [1:0.50]
-; SANDY-NEXT: andb %al, %cl # sched: [1:0.33]
-; SANDY-NEXT: vucomiss (%rdi), %xmm0 # sched: [8:1.00]
-; SANDY-NEXT: setnp %al # sched: [1:0.50]
-; SANDY-NEXT: sete %dl # sched: [1:0.50]
-; SANDY-NEXT: andb %al, %dl # sched: [1:0.33]
-; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33]
-; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_ucomiss:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
-; HASWELL-SSE-NEXT: sete %cl # sched: [1:0.50]
-; HASWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
-; HASWELL-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
-; HASWELL-SSE-NEXT: sete %dl # sched: [1:0.50]
-; HASWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
-; HASWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
-; HASWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_ucomiss:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: setnp %al # sched: [1:0.50]
-; HASWELL-NEXT: sete %cl # sched: [1:0.50]
-; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25]
-; HASWELL-NEXT: vucomiss (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-NEXT: setnp %al # sched: [1:0.50]
-; HASWELL-NEXT: sete %dl # sched: [1:0.50]
-; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25]
-; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25]
-; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_ucomiss:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: sete %cl # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: sete %dl # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_ucomiss:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: setnp %al # sched: [1:0.50]
-; BROADWELL-NEXT: sete %cl # sched: [1:0.50]
-; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25]
-; BROADWELL-NEXT: vucomiss (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT: setnp %al # sched: [1:0.50]
-; BROADWELL-NEXT: sete %dl # sched: [1:0.50]
-; BROADWELL-NEXT: andb %al, %dl # sched: [1:0.25]
-; BROADWELL-NEXT: orb %cl, %dl # sched: [1:0.25]
-; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_ucomiss:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: sete %cl # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [7:1.00]
-; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: sete %dl # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_ucomiss:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vucomiss %xmm1, %xmm0 # sched: [2:1.00]
-; SKYLAKE-NEXT: setnp %al # sched: [1:0.50]
-; SKYLAKE-NEXT: sete %cl # sched: [1:0.50]
-; SKYLAKE-NEXT: andb %al, %cl # sched: [1:0.25]
-; SKYLAKE-NEXT: vucomiss (%rdi), %xmm0 # sched: [7:1.00]
-; SKYLAKE-NEXT: setnp %al # sched: [1:0.50]
-; SKYLAKE-NEXT: sete %dl # sched: [1:0.50]
-; SKYLAKE-NEXT: andb %al, %dl # sched: [1:0.25]
-; SKYLAKE-NEXT: orb %cl, %dl # sched: [1:0.25]
-; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_ucomiss:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [2:1.00]
-; SKX-SSE-NEXT: setnp %al # sched: [1:0.50]
-; SKX-SSE-NEXT: sete %cl # sched: [1:0.50]
-; SKX-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
-; SKX-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [7:1.00]
-; SKX-SSE-NEXT: setnp %al # sched: [1:0.50]
-; SKX-SSE-NEXT: sete %dl # sched: [1:0.50]
-; SKX-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
-; SKX-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
-; SKX-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_ucomiss:
-; SKX: # %bb.0:
-; SKX-NEXT: vucomiss %xmm1, %xmm0 # sched: [2:1.00]
-; SKX-NEXT: setnp %al # sched: [1:0.50]
-; SKX-NEXT: sete %cl # sched: [1:0.50]
-; SKX-NEXT: andb %al, %cl # sched: [1:0.25]
-; SKX-NEXT: vucomiss (%rdi), %xmm0 # sched: [7:1.00]
-; SKX-NEXT: setnp %al # sched: [1:0.50]
-; SKX-NEXT: sete %dl # sched: [1:0.50]
-; SKX-NEXT: andb %al, %dl # sched: [1:0.25]
-; SKX-NEXT: orb %cl, %dl # sched: [1:0.25]
-; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_ucomiss:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [1:1.00]
-; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
-; BDVER2-SSE-NEXT: sete %cl # sched: [1:0.50]
-; BDVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50]
-; BDVER2-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [6:1.00]
-; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
-; BDVER2-SSE-NEXT: sete %dl # sched: [1:0.50]
-; BDVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50]
-; BDVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50]
-; BDVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_ucomiss:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vucomiss %xmm1, %xmm0 # sched: [1:1.00]
-; BDVER2-NEXT: setnp %al # sched: [1:0.50]
-; BDVER2-NEXT: sete %cl # sched: [1:0.50]
-; BDVER2-NEXT: andb %al, %cl # sched: [1:0.50]
-; BDVER2-NEXT: vucomiss (%rdi), %xmm0 # sched: [6:1.00]
-; BDVER2-NEXT: setnp %al # sched: [1:0.50]
-; BDVER2-NEXT: sete %dl # sched: [1:0.50]
-; BDVER2-NEXT: andb %al, %dl # sched: [1:0.50]
-; BDVER2-NEXT: orb %cl, %dl # sched: [1:0.50]
-; BDVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_ucomiss:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
-; BTVER2-SSE-NEXT: sete %cl # sched: [1:0.50]
-; BTVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50]
-; BTVER2-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
-; BTVER2-SSE-NEXT: sete %dl # sched: [1:0.50]
-; BTVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50]
-; BTVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50]
-; BTVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_ucomiss:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: setnp %al # sched: [1:0.50]
-; BTVER2-NEXT: sete %cl # sched: [1:0.50]
-; BTVER2-NEXT: andb %al, %cl # sched: [1:0.50]
-; BTVER2-NEXT: vucomiss (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-NEXT: setnp %al # sched: [1:0.50]
-; BTVER2-NEXT: sete %dl # sched: [1:0.50]
-; BTVER2-NEXT: andb %al, %dl # sched: [1:0.50]
-; BTVER2-NEXT: orb %cl, %dl # sched: [1:0.50]
-; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_ucomiss:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: sete %cl # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: sete %dl # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_ucomiss:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: setnp %al # sched: [1:0.25]
-; ZNVER1-NEXT: sete %cl # sched: [1:0.25]
-; ZNVER1-NEXT: andb %al, %cl # sched: [1:0.25]
-; ZNVER1-NEXT: vucomiss (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT: setnp %al # sched: [1:0.25]
-; ZNVER1-NEXT: sete %dl # sched: [1:0.25]
-; ZNVER1-NEXT: andb %al, %dl # sched: [1:0.25]
-; ZNVER1-NEXT: orb %cl, %dl # sched: [1:0.25]
-; ZNVER1-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1)
- %2 = load <4 x float>, <4 x float> *%a2, align 4
- %3 = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %2)
- %4 = or i32 %1, %3
- ret i32 %4
-}
-declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
-
-define <4 x float> @test_unpckhps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
-; GENERIC-LABEL: test_unpckhps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; GENERIC-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
-; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_unpckhps:
-; ATOM: # %bb.0:
-; ATOM-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; ATOM-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [1:1.00]
-; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_unpckhps:
-; SLM: # %bb.0:
-; SLM-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; SLM-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [4:1.00]
-; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_unpckhps:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; SANDY-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
-; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_unpckhps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; SANDY-NEXT: vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
-; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_unpckhps:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; HASWELL-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
-; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_unpckhps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; HASWELL-NEXT: vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
-; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_unpckhps:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; BROADWELL-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00]
-; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_unpckhps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; BROADWELL-NEXT: vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00]
-; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_unpckhps:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
-; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_unpckhps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; SKYLAKE-NEXT: vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
-; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_unpckhps:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; SKX-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
-; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_unpckhps:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; SKX-NEXT: vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_unpckhps:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [2:0.50]
-; BDVER2-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50]
-; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_unpckhps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [2:0.50]
-; BDVER2-NEXT: vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50]
-; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_unpckhps:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
-; BTVER2-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00]
-; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_unpckhps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
-; BTVER2-NEXT: vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00]
-; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_unpckhps:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
-; ZNVER1-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [8:0.50]
-; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_unpckhps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
-; ZNVER1-NEXT: vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [8:0.50]
-; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
- %2 = load <4 x float>, <4 x float> *%a2, align 16
- %3 = shufflevector <4 x float> %a1, <4 x float> %2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
- %4 = fadd <4 x float> %1, %3
- ret <4 x float> %4
-}
-
-define <4 x float> @test_unpcklps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
-; GENERIC-LABEL: test_unpcklps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; GENERIC-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
-; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_unpcklps:
-; ATOM: # %bb.0:
-; ATOM-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; ATOM-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [1:1.00]
-; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_unpcklps:
-; SLM: # %bb.0:
-; SLM-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; SLM-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [4:1.00]
-; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_unpcklps:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; SANDY-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
-; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_unpcklps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; SANDY-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
-; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_unpcklps:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; HASWELL-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
-; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_unpcklps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; HASWELL-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
-; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_unpcklps:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; BROADWELL-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00]
-; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_unpcklps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; BROADWELL-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00]
-; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_unpcklps:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
-; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_unpcklps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; SKYLAKE-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
-; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_unpcklps:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; SKX-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
-; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_unpcklps:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; SKX-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_unpcklps:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [2:0.50]
-; BDVER2-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50]
-; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_unpcklps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [2:0.50]
-; BDVER2-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50]
-; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_unpcklps:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
-; BTVER2-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00]
-; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_unpcklps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
-; BTVER2-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00]
-; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_unpcklps:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
-; ZNVER1-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [8:0.50]
-; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_unpcklps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
-; ZNVER1-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [8:0.50]
-; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
- %2 = load <4 x float>, <4 x float> *%a2, align 16
- %3 = shufflevector <4 x float> %a1, <4 x float> %2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
- %4 = fadd <4 x float> %1, %3
- ret <4 x float> %4
-}
-
-define <4 x float> @test_xorps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
-; GENERIC-LABEL: test_xorps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: xorps %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: xorps (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_xorps:
-; ATOM: # %bb.0:
-; ATOM-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: xorps (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_xorps:
-; SLM: # %bb.0:
-; SLM-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: xorps (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_xorps:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [7:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_xorps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_xorps:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [7:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_xorps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_xorps:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [6:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_xorps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_xorps:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_xorps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_xorps:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_xorps:
-; SKX: # %bb.0:
-; SKX-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_xorps:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_xorps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_xorps:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_xorps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_xorps:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_xorps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = bitcast <4 x float> %a0 to <4 x i32>
- %2 = bitcast <4 x float> %a1 to <4 x i32>
- %3 = xor <4 x i32> %1, %2
- %4 = load <4 x float>, <4 x float> *%a2, align 16
- %5 = bitcast <4 x float> %4 to <4 x i32>
- %6 = xor <4 x i32> %3, %5
- %7 = bitcast <4 x i32> %6 to <4 x float>
- ret <4 x float> %7
-}
-
-; 'WriteZero' and 'WriteNop' class instructions.
-
-define <4 x float> @test_fnop() nounwind {
-; GENERIC-LABEL: test_fnop:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: nop # sched: [1:0.25]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: xorps %xmm0, %xmm0 # sched: [0:0.25]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_fnop:
-; ATOM: # %bb.0:
-; ATOM-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_fnop:
-; SLM: # %bb.0:
-; SLM-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: #APP
-; SLM-NEXT: nop # sched: [1:0.50]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_fnop:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: #APP
-; SANDY-SSE-NEXT: nop # sched: [1:0.25]
-; SANDY-SSE-NEXT: #NO_APP
-; SANDY-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [0:0.25]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_fnop:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: nop # sched: [1:0.25]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [0:0.25]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_fnop:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: #APP
-; HASWELL-SSE-NEXT: nop # sched: [1:0.25]
-; HASWELL-SSE-NEXT: #NO_APP
-; HASWELL-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_fnop:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: nop # sched: [1:0.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_fnop:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: #APP
-; BROADWELL-SSE-NEXT: nop # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: #NO_APP
-; BROADWELL-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fnop:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: nop # sched: [1:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_fnop:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: #APP
-; SKYLAKE-SSE-NEXT: nop # sched: [1:0.17]
-; SKYLAKE-SSE-NEXT: #NO_APP
-; SKYLAKE-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_fnop:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: nop # sched: [1:0.17]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_fnop:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: #APP
-; SKX-SSE-NEXT: nop # sched: [1:0.17]
-; SKX-SSE-NEXT: #NO_APP
-; SKX-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_fnop:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: nop # sched: [1:0.17]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_fnop:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [0:0.25]
-; BDVER2-SSE-NEXT: #APP
-; BDVER2-SSE-NEXT: nop # sched: [1:0.50]
-; BDVER2-SSE-NEXT: #NO_APP
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_fnop:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [0:0.25]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: nop # sched: [1:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_fnop:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [0:0.50]
-; BTVER2-SSE-NEXT: #APP
-; BTVER2-SSE-NEXT: nop # sched: [1:0.50]
-; BTVER2-SSE-NEXT: #NO_APP
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_fnop:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [0:0.50]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: nop # sched: [1:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_fnop:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: #APP
-; ZNVER1-SSE-NEXT: nop # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: #NO_APP
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_fnop:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: nop # sched: [1:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void asm sideeffect "nop", ""() nounwind
- ret <4 x float> zeroinitializer
-}
-
-!0 = !{i32 1}
diff --git a/llvm/test/CodeGen/X86/sse2-schedule.ll b/llvm/test/CodeGen/X86/sse2-schedule.ll
deleted file mode 100644
index ffb650f833e..00000000000
--- a/llvm/test/CodeGen/X86/sse2-schedule.ll
+++ /dev/null
@@ -1,16972 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ATOM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SLM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-avx2,-xop | FileCheck %s --check-prefixes=CHECK,BDVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1
-
-define <2 x double> @test_addpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_addpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: addpd (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_addpd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT: addpd (%rdi), %xmm0 # sched: [7:3.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_addpd:
-; SLM: # %bb.0:
-; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: addpd (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_addpd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_addpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_addpd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [9:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_addpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_addpd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_addpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_addpd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_addpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_addpd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_addpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_addpd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [10:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_addpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_addpd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_addpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_addpd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_addpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fadd <2 x double> %a0, %a1
- %2 = load <2 x double>, <2 x double> *%a2, align 16
- %3 = fadd <2 x double> %1, %2
- ret <2 x double> %3
-}
-
-define double @test_addsd(double %a0, double %a1, double *%a2) {
-; GENERIC-LABEL: test_addsd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: addsd (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_addsd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: addsd %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: addsd (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_addsd:
-; SLM: # %bb.0:
-; SLM-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: addsd (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_addsd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_addsd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_addsd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_addsd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_addsd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_addsd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_addsd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [9:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_addsd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_addsd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [9:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_addsd:
-; SKX: # %bb.0:
-; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_addsd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [10:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_addsd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_addsd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_addsd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_addsd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_addsd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fadd double %a0, %a1
- %2 = load double, double *%a2, align 8
- %3 = fadd double %1, %2
- ret double %3
-}
-
-define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_andpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: andpd %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: andpd (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_andpd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: andpd (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_andpd:
-; SLM: # %bb.0:
-; SLM-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: andpd (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_andpd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:1.00]
-; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_andpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_andpd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:1.00]
-; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_andpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_andpd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [6:1.00]
-; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_andpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_andpd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_andpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_andpd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_andpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_andpd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_andpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_andpd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_andpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_andpd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_andpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = bitcast <2 x double> %a0 to <4 x i32>
- %2 = bitcast <2 x double> %a1 to <4 x i32>
- %3 = and <4 x i32> %1, %2
- %4 = load <2 x double>, <2 x double> *%a2, align 16
- %5 = bitcast <2 x double> %4 to <4 x i32>
- %6 = and <4 x i32> %3, %5
- %7 = bitcast <4 x i32> %6 to <2 x double>
- %8 = fadd <2 x double> %a1, %7
- ret <2 x double> %8
-}
-
-define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_andnotpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: andnpd (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_andnotpd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: andnpd (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_andnotpd:
-; SLM: # %bb.0:
-; SLM-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: andnpd (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_andnotpd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:1.00]
-; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_andnotpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_andnotpd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:1.00]
-; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_andnotpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_andnotpd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [6:1.00]
-; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_andnotpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_andnotpd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_andnotpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_andnotpd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_andnotpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_andnotpd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_andnotpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_andnotpd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_andnotpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_andnotpd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_andnotpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = bitcast <2 x double> %a0 to <4 x i32>
- %2 = bitcast <2 x double> %a1 to <4 x i32>
- %3 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
- %4 = and <4 x i32> %3, %2
- %5 = load <2 x double>, <2 x double> *%a2, align 16
- %6 = bitcast <2 x double> %5 to <4 x i32>
- %7 = xor <4 x i32> %4, <i32 -1, i32 -1, i32 -1, i32 -1>
- %8 = and <4 x i32> %6, %7
- %9 = bitcast <4 x i32> %8 to <2 x double>
- %10 = fadd <2 x double> %a1, %9
- ret <2 x double> %10
-}
-
-define void @test_clflush(i8* %p){
-; GENERIC-LABEL: test_clflush:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: clflush (%rdi) # sched: [5:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_clflush:
-; ATOM: # %bb.0:
-; ATOM-NEXT: clflush (%rdi) # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_clflush:
-; SLM: # %bb.0:
-; SLM-NEXT: clflush (%rdi) # sched: [3:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_clflush:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: clflush (%rdi) # sched: [5:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_clflush:
-; SANDY: # %bb.0:
-; SANDY-NEXT: clflush (%rdi) # sched: [5:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_clflush:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: clflush (%rdi) # sched: [2:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_clflush:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: clflush (%rdi) # sched: [2:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_clflush:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: clflush (%rdi) # sched: [2:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_clflush:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: clflush (%rdi) # sched: [2:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_clflush:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: clflush (%rdi) # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_clflush:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: clflush (%rdi) # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_clflush:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: clflush (%rdi) # sched: [2:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_clflush:
-; SKX: # %bb.0:
-; SKX-NEXT: clflush (%rdi) # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_clflush:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: clflush (%rdi) # sched: [5:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_clflush:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: clflush (%rdi) # sched: [5:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_clflush:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: clflush (%rdi) # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_clflush:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: clflush (%rdi) # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_clflush:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: clflush (%rdi) # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_clflush:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: clflush (%rdi) # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void @llvm.x86.sse2.clflush(i8* %p)
- ret void
-}
-declare void @llvm.x86.sse2.clflush(i8*) nounwind
-
-define <2 x double> @test_cmppd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_cmppd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
-; GENERIC-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cmppd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [6:3.00]
-; ATOM-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [7:3.50]
-; ATOM-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cmppd:
-; SLM: # %bb.0:
-; SLM-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
-; SLM-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cmppd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
-; SANDY-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cmppd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
-; SANDY-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cmppd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [9:1.00]
-; HASWELL-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cmppd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
-; HASWELL-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; HASWELL-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cmppd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cmppd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
-; BROADWELL-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cmppd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cmppd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [4:0.50]
-; SKYLAKE-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKYLAKE-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cmppd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [4:0.50]
-; SKX-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cmppd:
-; SKX: # %bb.0:
-; SKX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [4:0.50]
-; SKX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKX-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cmppd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [2:1.00]
-; BDVER2-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [7:1.00]
-; BDVER2-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cmppd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [2:1.00]
-; BDVER2-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BDVER2-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cmppd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [2:1.00]
-; BTVER2-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cmppd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [2:1.00]
-; BTVER2-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cmppd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cmppd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
-; ZNVER1-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fcmp oeq <2 x double> %a0, %a1
- %2 = load <2 x double>, <2 x double> *%a2, align 16
- %3 = fcmp oeq <2 x double> %a0, %2
- %4 = or <2 x i1> %1, %3
- %5 = sext <2 x i1> %4 to <2 x i64>
- %6 = bitcast <2 x i64> %5 to <2 x double>
- ret <2 x double> %6
-}
-
-define double @test_cmpsd(double %a0, double %a1, double *%a2) {
-; GENERIC-LABEL: test_cmpsd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cmpsd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cmpsd:
-; SLM: # %bb.0:
-; SLM-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cmpsd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cmpsd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cmpsd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cmpsd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cmpsd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cmpsd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cmpsd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cmpsd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cmpsd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cmpsd:
-; SKX: # %bb.0:
-; SKX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cmpsd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [2:1.00]
-; BDVER2-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [7:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cmpsd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BDVER2-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cmpsd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cmpsd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cmpsd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cmpsd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = insertelement <2 x double> undef, double %a0, i32 0
- %2 = insertelement <2 x double> undef, double %a1, i32 0
- %3 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %1, <2 x double> %2, i8 0)
- %4 = load double, double *%a2, align 8
- %5 = insertelement <2 x double> undef, double %4, i32 0
- %6 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %3, <2 x double> %5, i8 0)
- %7 = extractelement <2 x double> %6, i32 0
- ret double %7
-}
-declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
-
-define i32 @test_comisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_comisd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: comisd %xmm1, %xmm0 # sched: [2:1.00]
-; GENERIC-NEXT: setnp %al # sched: [1:0.50]
-; GENERIC-NEXT: sete %cl # sched: [1:0.50]
-; GENERIC-NEXT: andb %al, %cl # sched: [1:0.33]
-; GENERIC-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00]
-; GENERIC-NEXT: setnp %al # sched: [1:0.50]
-; GENERIC-NEXT: sete %dl # sched: [1:0.50]
-; GENERIC-NEXT: andb %al, %dl # sched: [1:0.33]
-; GENERIC-NEXT: orb %cl, %dl # sched: [1:0.33]
-; GENERIC-NEXT: movzbl %dl, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_comisd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: comisd %xmm1, %xmm0 # sched: [9:4.50]
-; ATOM-NEXT: setnp %al # sched: [1:0.50]
-; ATOM-NEXT: sete %cl # sched: [1:0.50]
-; ATOM-NEXT: andb %al, %cl # sched: [1:0.50]
-; ATOM-NEXT: comisd (%rdi), %xmm0 # sched: [10:5.00]
-; ATOM-NEXT: setnp %al # sched: [1:0.50]
-; ATOM-NEXT: sete %dl # sched: [1:0.50]
-; ATOM-NEXT: andb %al, %dl # sched: [1:0.50]
-; ATOM-NEXT: orb %cl, %dl # sched: [1:0.50]
-; ATOM-NEXT: movzbl %dl, %eax # sched: [1:1.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_comisd:
-; SLM: # %bb.0:
-; SLM-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: setnp %al # sched: [1:0.50]
-; SLM-NEXT: sete %cl # sched: [1:0.50]
-; SLM-NEXT: andb %al, %cl # sched: [1:0.50]
-; SLM-NEXT: comisd (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT: setnp %al # sched: [1:0.50]
-; SLM-NEXT: sete %dl # sched: [1:0.50]
-; SLM-NEXT: andb %al, %dl # sched: [1:0.50]
-; SLM-NEXT: orb %cl, %dl # sched: [1:0.50]
-; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_comisd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [2:1.00]
-; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50]
-; SANDY-SSE-NEXT: sete %cl # sched: [1:0.50]
-; SANDY-SSE-NEXT: andb %al, %cl # sched: [1:0.33]
-; SANDY-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00]
-; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50]
-; SANDY-SSE-NEXT: sete %dl # sched: [1:0.50]
-; SANDY-SSE-NEXT: andb %al, %dl # sched: [1:0.33]
-; SANDY-SSE-NEXT: orb %cl, %dl # sched: [1:0.33]
-; SANDY-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_comisd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vcomisd %xmm1, %xmm0 # sched: [2:1.00]
-; SANDY-NEXT: setnp %al # sched: [1:0.50]
-; SANDY-NEXT: sete %cl # sched: [1:0.50]
-; SANDY-NEXT: andb %al, %cl # sched: [1:0.33]
-; SANDY-NEXT: vcomisd (%rdi), %xmm0 # sched: [8:1.00]
-; SANDY-NEXT: setnp %al # sched: [1:0.50]
-; SANDY-NEXT: sete %dl # sched: [1:0.50]
-; SANDY-NEXT: andb %al, %dl # sched: [1:0.33]
-; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33]
-; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_comisd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
-; HASWELL-SSE-NEXT: sete %cl # sched: [1:0.50]
-; HASWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
-; HASWELL-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
-; HASWELL-SSE-NEXT: sete %dl # sched: [1:0.50]
-; HASWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
-; HASWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
-; HASWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_comisd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: setnp %al # sched: [1:0.50]
-; HASWELL-NEXT: sete %cl # sched: [1:0.50]
-; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25]
-; HASWELL-NEXT: vcomisd (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-NEXT: setnp %al # sched: [1:0.50]
-; HASWELL-NEXT: sete %dl # sched: [1:0.50]
-; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25]
-; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25]
-; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_comisd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: sete %cl # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: sete %dl # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_comisd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: setnp %al # sched: [1:0.50]
-; BROADWELL-NEXT: sete %cl # sched: [1:0.50]
-; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25]
-; BROADWELL-NEXT: vcomisd (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT: setnp %al # sched: [1:0.50]
-; BROADWELL-NEXT: sete %dl # sched: [1:0.50]
-; BROADWELL-NEXT: andb %al, %dl # sched: [1:0.25]
-; BROADWELL-NEXT: orb %cl, %dl # sched: [1:0.25]
-; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_comisd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: sete %cl # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [7:1.00]
-; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: sete %dl # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_comisd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcomisd %xmm1, %xmm0 # sched: [2:1.00]
-; SKYLAKE-NEXT: setnp %al # sched: [1:0.50]
-; SKYLAKE-NEXT: sete %cl # sched: [1:0.50]
-; SKYLAKE-NEXT: andb %al, %cl # sched: [1:0.25]
-; SKYLAKE-NEXT: vcomisd (%rdi), %xmm0 # sched: [7:1.00]
-; SKYLAKE-NEXT: setnp %al # sched: [1:0.50]
-; SKYLAKE-NEXT: sete %dl # sched: [1:0.50]
-; SKYLAKE-NEXT: andb %al, %dl # sched: [1:0.25]
-; SKYLAKE-NEXT: orb %cl, %dl # sched: [1:0.25]
-; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_comisd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [2:1.00]
-; SKX-SSE-NEXT: setnp %al # sched: [1:0.50]
-; SKX-SSE-NEXT: sete %cl # sched: [1:0.50]
-; SKX-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
-; SKX-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [7:1.00]
-; SKX-SSE-NEXT: setnp %al # sched: [1:0.50]
-; SKX-SSE-NEXT: sete %dl # sched: [1:0.50]
-; SKX-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
-; SKX-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
-; SKX-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_comisd:
-; SKX: # %bb.0:
-; SKX-NEXT: vcomisd %xmm1, %xmm0 # sched: [2:1.00]
-; SKX-NEXT: setnp %al # sched: [1:0.50]
-; SKX-NEXT: sete %cl # sched: [1:0.50]
-; SKX-NEXT: andb %al, %cl # sched: [1:0.25]
-; SKX-NEXT: vcomisd (%rdi), %xmm0 # sched: [7:1.00]
-; SKX-NEXT: setnp %al # sched: [1:0.50]
-; SKX-NEXT: sete %dl # sched: [1:0.50]
-; SKX-NEXT: andb %al, %dl # sched: [1:0.25]
-; SKX-NEXT: orb %cl, %dl # sched: [1:0.25]
-; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_comisd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [1:1.00]
-; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
-; BDVER2-SSE-NEXT: sete %cl # sched: [1:0.50]
-; BDVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50]
-; BDVER2-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [6:1.00]
-; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
-; BDVER2-SSE-NEXT: sete %dl # sched: [1:0.50]
-; BDVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50]
-; BDVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50]
-; BDVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_comisd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcomisd %xmm1, %xmm0 # sched: [1:1.00]
-; BDVER2-NEXT: setnp %al # sched: [1:0.50]
-; BDVER2-NEXT: sete %cl # sched: [1:0.50]
-; BDVER2-NEXT: andb %al, %cl # sched: [1:0.50]
-; BDVER2-NEXT: vcomisd (%rdi), %xmm0 # sched: [6:1.00]
-; BDVER2-NEXT: setnp %al # sched: [1:0.50]
-; BDVER2-NEXT: sete %dl # sched: [1:0.50]
-; BDVER2-NEXT: andb %al, %dl # sched: [1:0.50]
-; BDVER2-NEXT: orb %cl, %dl # sched: [1:0.50]
-; BDVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_comisd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
-; BTVER2-SSE-NEXT: sete %cl # sched: [1:0.50]
-; BTVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50]
-; BTVER2-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
-; BTVER2-SSE-NEXT: sete %dl # sched: [1:0.50]
-; BTVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50]
-; BTVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50]
-; BTVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_comisd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: setnp %al # sched: [1:0.50]
-; BTVER2-NEXT: sete %cl # sched: [1:0.50]
-; BTVER2-NEXT: andb %al, %cl # sched: [1:0.50]
-; BTVER2-NEXT: vcomisd (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-NEXT: setnp %al # sched: [1:0.50]
-; BTVER2-NEXT: sete %dl # sched: [1:0.50]
-; BTVER2-NEXT: andb %al, %dl # sched: [1:0.50]
-; BTVER2-NEXT: orb %cl, %dl # sched: [1:0.50]
-; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_comisd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: sete %cl # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: sete %dl # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_comisd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: setnp %al # sched: [1:0.25]
-; ZNVER1-NEXT: sete %cl # sched: [1:0.25]
-; ZNVER1-NEXT: andb %al, %cl # sched: [1:0.25]
-; ZNVER1-NEXT: vcomisd (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT: setnp %al # sched: [1:0.25]
-; ZNVER1-NEXT: sete %dl # sched: [1:0.25]
-; ZNVER1-NEXT: andb %al, %dl # sched: [1:0.25]
-; ZNVER1-NEXT: orb %cl, %dl # sched: [1:0.25]
-; ZNVER1-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1)
- %2 = load <2 x double>, <2 x double> *%a2, align 8
- %3 = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %2)
- %4 = or i32 %1, %3
- ret i32 %4
-}
-declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
-
-define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) {
-; GENERIC-LABEL: test_cvtdq2pd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:1.00]
-; GENERIC-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [10:1.00]
-; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvtdq2pd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: cvtdq2pd (%rdi), %xmm1 # sched: [8:4.00]
-; ATOM-NEXT: cvtdq2pd %xmm0, %xmm0 # sched: [7:3.50]
-; ATOM-NEXT: addpd %xmm0, %xmm1 # sched: [6:3.00]
-; ATOM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvtdq2pd:
-; SLM: # %bb.0:
-; SLM-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:0.50]
-; SLM-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cvtdq2pd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:1.00]
-; SANDY-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [10:1.00]
-; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvtdq2pd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
-; SANDY-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [10:1.00]
-; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cvtdq2pd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:1.00]
-; HASWELL-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [10:1.00]
-; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cvtdq2pd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
-; HASWELL-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [10:1.00]
-; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cvtdq2pd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: cvtdq2pd (%rdi), %xmm1 # sched: [9:1.00]
-; BROADWELL-SSE-NEXT: cvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
-; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvtdq2pd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [9:1.00]
-; BROADWELL-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
-; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cvtdq2pd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [5:1.00]
-; SKYLAKE-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [11:1.00]
-; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvtdq2pd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00]
-; SKYLAKE-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [11:1.00]
-; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cvtdq2pd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:0.50]
-; SKX-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [11:1.00]
-; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvtdq2pd:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [11:1.00]
-; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cvtdq2pd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [8:1.00]
-; BDVER2-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [13:1.00]
-; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cvtdq2pd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [13:1.00]
-; BDVER2-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [8:1.00]
-; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cvtdq2pd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cvtdq2pd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [8:1.00]
-; BTVER2-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cvtdq2pd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [5:1.00]
-; ZNVER1-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [12:1.00]
-; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cvtdq2pd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [12:1.00]
-; ZNVER1-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00]
-; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
- %2 = sitofp <2 x i32> %1 to <2 x double>
- %3 = load <4 x i32>, <4 x i32>*%a1, align 16
- %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
- %5 = sitofp <2 x i32> %4 to <2 x double>
- %6 = fadd <2 x double> %2, %5
- ret <2 x double> %6
-}
-
-define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) {
-; GENERIC-LABEL: test_cvtdq2ps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00]
-; GENERIC-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvtdq2ps:
-; ATOM: # %bb.0:
-; ATOM-NEXT: cvtdq2ps (%rdi), %xmm1 # sched: [7:3.50]
-; ATOM-NEXT: cvtdq2ps %xmm0, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00]
-; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvtdq2ps:
-; SLM: # %bb.0:
-; SLM-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [4:0.50]
-; SLM-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cvtdq2ps:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00]
-; SANDY-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvtdq2ps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [9:1.00]
-; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cvtdq2ps:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00]
-; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cvtdq2ps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [9:1.00]
-; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cvtdq2ps:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvtdq2ps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [8:1.00]
-; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cvtdq2ps:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvtdq2ps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [10:0.50]
-; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cvtdq2ps:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [4:0.50]
-; SKX-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvtdq2ps:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [10:0.50]
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cvtdq2ps:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [4:1.00]
-; BDVER2-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00]
-; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cvtdq2ps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [9:1.00]
-; BDVER2-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:1.00]
-; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cvtdq2ps:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cvtdq2ps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [8:1.00]
-; BTVER2-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cvtdq2ps:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [5:1.00]
-; ZNVER1-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [12:1.00]
-; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cvtdq2ps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [12:1.00]
-; ZNVER1-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [5:1.00]
-; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = sitofp <4 x i32> %a0 to <4 x float>
- %2 = load <4 x i32>, <4 x i32>*%a1, align 16
- %3 = sitofp <4 x i32> %2 to <4 x float>
- %4 = fadd <4 x float> %1, %3
- ret <4 x float> %4
-}
-
-define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) {
-; GENERIC-LABEL: test_cvtpd2dq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [4:1.00]
-; GENERIC-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [10:1.00]
-; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvtpd2dq:
-; ATOM: # %bb.0:
-; ATOM-NEXT: cvtpd2dq (%rdi), %xmm1 # sched: [8:4.00]
-; ATOM-NEXT: cvtpd2dq %xmm0, %xmm0 # sched: [7:3.50]
-; ATOM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvtpd2dq:
-; SLM: # %bb.0:
-; SLM-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [4:0.50]
-; SLM-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cvtpd2dq:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [4:1.00]
-; SANDY-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [10:1.00]
-; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvtpd2dq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00]
-; SANDY-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [10:1.00]
-; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cvtpd2dq:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [4:1.00]
-; HASWELL-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [10:1.00]
-; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cvtpd2dq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00]
-; HASWELL-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
-; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cvtpd2dq:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: cvtpd2dq (%rdi), %xmm1 # sched: [9:1.00]
-; BROADWELL-SSE-NEXT: cvtpd2dq %xmm0, %xmm0 # sched: [4:1.00]
-; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvtpd2dq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00]
-; BROADWELL-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
-; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cvtpd2dq:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [5:1.00]
-; SKYLAKE-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [11:1.00]
-; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvtpd2dq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00]
-; SKYLAKE-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
-; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cvtpd2dq:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [5:1.00]
-; SKX-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [11:1.00]
-; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvtpd2dq:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00]
-; SKX-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:0.50]
-; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cvtpd2dq:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [8:1.00]
-; BDVER2-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [13:1.00]
-; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cvtpd2dq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [13:1.00]
-; BDVER2-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [8:1.00]
-; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cvtpd2dq:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cvtpd2dq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
-; BTVER2-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cvtpd2dq:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [5:1.00]
-; ZNVER1-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [12:1.00]
-; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cvtpd2dq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [12:1.00]
-; ZNVER1-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00]
-; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
- %2 = load <2 x double>, <2 x double> *%a1, align 16
- %3 = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %2)
- %4 = add <4 x i32> %1, %3
- ret <4 x i32> %4
-}
-declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
-
-define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) {
-; GENERIC-LABEL: test_cvtpd2ps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00]
-; GENERIC-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [10:1.00]
-; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvtpd2ps:
-; ATOM: # %bb.0:
-; ATOM-NEXT: cvtpd2ps (%rdi), %xmm1 # sched: [8:4.00]
-; ATOM-NEXT: cvtpd2ps %xmm0, %xmm0 # sched: [7:3.50]
-; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00]
-; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvtpd2ps:
-; SLM: # %bb.0:
-; SLM-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:0.50]
-; SLM-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cvtpd2ps:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00]
-; SANDY-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [10:1.00]
-; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvtpd2ps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00]
-; SANDY-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [10:1.00]
-; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cvtpd2ps:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00]
-; HASWELL-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [10:1.00]
-; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cvtpd2ps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00]
-; HASWELL-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00]
-; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cvtpd2ps:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: cvtpd2ps (%rdi), %xmm1 # sched: [9:1.00]
-; BROADWELL-SSE-NEXT: cvtpd2ps %xmm0, %xmm0 # sched: [4:1.00]
-; BROADWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvtpd2ps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00]
-; BROADWELL-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00]
-; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cvtpd2ps:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [5:1.00]
-; SKYLAKE-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00]
-; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvtpd2ps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [5:1.00]
-; SKYLAKE-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00]
-; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cvtpd2ps:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [5:1.00]
-; SKX-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00]
-; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvtpd2ps:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [5:1.00]
-; SKX-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00]
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cvtpd2ps:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [8:1.00]
-; BDVER2-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [13:1.00]
-; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cvtpd2ps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [13:1.00]
-; BDVER2-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [8:1.00]
-; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cvtpd2ps:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cvtpd2ps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00]
-; BTVER2-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cvtpd2ps:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00]
-; ZNVER1-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00]
-; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cvtpd2ps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [11:1.00]
-; ZNVER1-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00]
-; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0)
- %2 = load <2 x double>, <2 x double> *%a1, align 16
- %3 = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %2)
- %4 = fadd <4 x float> %1, %3
- ret <4 x float> %4
-}
-declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
-
-define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) {
-; GENERIC-LABEL: test_cvtps2dq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00]
-; GENERIC-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvtps2dq:
-; ATOM: # %bb.0:
-; ATOM-NEXT: cvtps2dq (%rdi), %xmm1 # sched: [7:3.50]
-; ATOM-NEXT: cvtps2dq %xmm0, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvtps2dq:
-; SLM: # %bb.0:
-; SLM-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [4:0.50]
-; SLM-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cvtps2dq:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00]
-; SANDY-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvtps2dq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [9:1.00]
-; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cvtps2dq:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [9:1.00]
-; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cvtps2dq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [9:1.00]
-; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cvtps2dq:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvtps2dq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [8:1.00]
-; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cvtps2dq:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvtps2dq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [10:0.50]
-; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cvtps2dq:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [4:0.50]
-; SKX-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvtps2dq:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [10:0.50]
-; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cvtps2dq:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [4:1.00]
-; BDVER2-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [9:1.00]
-; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cvtps2dq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [9:1.00]
-; BDVER2-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [4:1.00]
-; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cvtps2dq:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cvtps2dq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [8:1.00]
-; BTVER2-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cvtps2dq:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [5:1.00]
-; ZNVER1-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [12:1.00]
-; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cvtps2dq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [12:1.00]
-; ZNVER1-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [5:1.00]
-; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0)
- %2 = load <4 x float>, <4 x float> *%a1, align 16
- %3 = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %2)
- %4 = add <4 x i32> %1, %3
- ret <4 x i32> %4
-}
-declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
-
-define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) {
-; GENERIC-LABEL: test_cvtps2pd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00]
-; GENERIC-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvtps2pd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: cvtps2pd (%rdi), %xmm1 # sched: [8:4.00]
-; ATOM-NEXT: cvtps2pd %xmm0, %xmm0 # sched: [7:3.50]
-; ATOM-NEXT: addpd %xmm0, %xmm1 # sched: [6:3.00]
-; ATOM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvtps2pd:
-; SLM: # %bb.0:
-; SLM-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [4:0.50]
-; SLM-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cvtps2pd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00]
-; SANDY-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [7:1.00]
-; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvtps2pd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00]
-; SANDY-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [7:1.00]
-; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cvtps2pd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00]
-; HASWELL-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [6:1.00]
-; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cvtps2pd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00]
-; HASWELL-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [6:1.00]
-; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cvtps2pd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00]
-; BROADWELL-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [6:1.00]
-; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvtps2pd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00]
-; BROADWELL-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [6:1.00]
-; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cvtps2pd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [5:1.00]
-; SKYLAKE-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [9:0.50]
-; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvtps2pd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [5:1.00]
-; SKYLAKE-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [9:0.50]
-; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cvtps2pd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [5:1.00]
-; SKX-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [9:0.50]
-; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvtps2pd:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [5:1.00]
-; SKX-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [9:0.50]
-; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cvtps2pd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [8:1.00]
-; BDVER2-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [13:1.00]
-; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cvtps2pd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [13:1.00]
-; BDVER2-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [8:1.00]
-; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cvtps2pd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00]
-; BTVER2-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cvtps2pd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [7:1.00]
-; BTVER2-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cvtps2pd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cvtps2pd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [10:1.00]
-; ZNVER1-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <4 x float> %a0, <4 x float> undef, <2 x i32> <i32 0, i32 1>
- %2 = fpext <2 x float> %1 to <2 x double>
- %3 = load <4 x float>, <4 x float> *%a1, align 16
- %4 = shufflevector <4 x float> %3, <4 x float> undef, <2 x i32> <i32 0, i32 1>
- %5 = fpext <2 x float> %4 to <2 x double>
- %6 = fadd <2 x double> %2, %5
- ret <2 x double> %6
-}
-
-define i32 @test_cvtsd2si(double %a0, double *%a1) {
-; GENERIC-LABEL: test_cvtsd2si:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: cvtsd2si %xmm0, %ecx # sched: [5:1.00]
-; GENERIC-NEXT: cvtsd2si (%rdi), %eax # sched: [9:1.00]
-; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvtsd2si:
-; ATOM: # %bb.0:
-; ATOM-NEXT: cvtsd2si (%rdi), %eax # sched: [9:4.50]
-; ATOM-NEXT: cvtsd2si %xmm0, %ecx # sched: [8:4.00]
-; ATOM-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvtsd2si:
-; SLM: # %bb.0:
-; SLM-NEXT: cvtsd2si (%rdi), %eax # sched: [7:1.00]
-; SLM-NEXT: cvtsd2si %xmm0, %ecx # sched: [4:0.50]
-; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cvtsd2si:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [5:1.00]
-; SANDY-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [9:1.00]
-; SANDY-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvtsd2si:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vcvtsd2si %xmm0, %ecx # sched: [5:1.00]
-; SANDY-NEXT: vcvtsd2si (%rdi), %eax # sched: [10:1.00]
-; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cvtsd2si:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [4:1.00]
-; HASWELL-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [9:1.00]
-; HASWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cvtsd2si:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcvtsd2si %xmm0, %ecx # sched: [4:1.00]
-; HASWELL-NEXT: vcvtsd2si (%rdi), %eax # sched: [9:1.00]
-; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cvtsd2si:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [9:1.00]
-; BROADWELL-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [4:1.00]
-; BROADWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvtsd2si:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcvtsd2si (%rdi), %eax # sched: [9:1.00]
-; BROADWELL-NEXT: vcvtsd2si %xmm0, %ecx # sched: [4:1.00]
-; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cvtsd2si:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [6:1.00]
-; SKYLAKE-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [11:1.00]
-; SKYLAKE-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvtsd2si:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcvtsd2si %xmm0, %ecx # sched: [6:1.00]
-; SKYLAKE-NEXT: vcvtsd2si (%rdi), %eax # sched: [11:1.00]
-; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cvtsd2si:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [6:1.00]
-; SKX-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [11:1.00]
-; SKX-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvtsd2si:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtsd2si %xmm0, %ecx # sched: [6:1.00]
-; SKX-NEXT: vcvtsd2si (%rdi), %eax # sched: [11:1.00]
-; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cvtsd2si:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [18:1.00]
-; BDVER2-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [13:1.00]
-; BDVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cvtsd2si:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcvtsd2si (%rdi), %eax # sched: [18:1.00]
-; BDVER2-NEXT: vcvtsd2si %xmm0, %ecx # sched: [13:1.00]
-; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cvtsd2si:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [12:1.00]
-; BTVER2-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [7:1.00]
-; BTVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cvtsd2si:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcvtsd2si (%rdi), %eax # sched: [12:1.00]
-; BTVER2-NEXT: vcvtsd2si %xmm0, %ecx # sched: [7:1.00]
-; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cvtsd2si:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [12:1.00]
-; ZNVER1-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [5:1.00]
-; ZNVER1-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cvtsd2si:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcvtsd2si (%rdi), %eax # sched: [12:1.00]
-; ZNVER1-NEXT: vcvtsd2si %xmm0, %ecx # sched: [5:1.00]
-; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = insertelement <2 x double> undef, double %a0, i32 0
- %2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %1)
- %3 = load double, double *%a1, align 8
- %4 = insertelement <2 x double> undef, double %3, i32 0
- %5 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %4)
- %6 = add i32 %2, %5
- ret i32 %6
-}
-declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
-
-define i64 @test_cvtsd2siq(double %a0, double *%a1) {
-; GENERIC-LABEL: test_cvtsd2siq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: cvtsd2si %xmm0, %rcx # sched: [5:1.00]
-; GENERIC-NEXT: cvtsd2si (%rdi), %rax # sched: [9:1.00]
-; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvtsd2siq:
-; ATOM: # %bb.0:
-; ATOM-NEXT: cvtsd2si (%rdi), %rax # sched: [9:4.50]
-; ATOM-NEXT: cvtsd2si %xmm0, %rcx # sched: [8:4.00]
-; ATOM-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvtsd2siq:
-; SLM: # %bb.0:
-; SLM-NEXT: cvtsd2si (%rdi), %rax # sched: [7:1.00]
-; SLM-NEXT: cvtsd2si %xmm0, %rcx # sched: [4:0.50]
-; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cvtsd2siq:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [5:1.00]
-; SANDY-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [9:1.00]
-; SANDY-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvtsd2siq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vcvtsd2si %xmm0, %rcx # sched: [5:1.00]
-; SANDY-NEXT: vcvtsd2si (%rdi), %rax # sched: [10:1.00]
-; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cvtsd2siq:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [4:1.00]
-; HASWELL-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [9:1.00]
-; HASWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cvtsd2siq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcvtsd2si %xmm0, %rcx # sched: [4:1.00]
-; HASWELL-NEXT: vcvtsd2si (%rdi), %rax # sched: [9:1.00]
-; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cvtsd2siq:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [9:1.00]
-; BROADWELL-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [4:1.00]
-; BROADWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvtsd2siq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcvtsd2si (%rdi), %rax # sched: [9:1.00]
-; BROADWELL-NEXT: vcvtsd2si %xmm0, %rcx # sched: [4:1.00]
-; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cvtsd2siq:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [6:1.00]
-; SKYLAKE-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [11:1.00]
-; SKYLAKE-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvtsd2siq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcvtsd2si %xmm0, %rcx # sched: [6:1.00]
-; SKYLAKE-NEXT: vcvtsd2si (%rdi), %rax # sched: [11:1.00]
-; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cvtsd2siq:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [6:1.00]
-; SKX-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [11:1.00]
-; SKX-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvtsd2siq:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtsd2si %xmm0, %rcx # sched: [6:1.00]
-; SKX-NEXT: vcvtsd2si (%rdi), %rax # sched: [11:1.00]
-; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cvtsd2siq:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [18:1.00]
-; BDVER2-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [13:1.00]
-; BDVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cvtsd2siq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcvtsd2si (%rdi), %rax # sched: [18:1.00]
-; BDVER2-NEXT: vcvtsd2si %xmm0, %rcx # sched: [13:1.00]
-; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cvtsd2siq:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [12:1.00]
-; BTVER2-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [7:1.00]
-; BTVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cvtsd2siq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcvtsd2si (%rdi), %rax # sched: [12:1.00]
-; BTVER2-NEXT: vcvtsd2si %xmm0, %rcx # sched: [7:1.00]
-; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cvtsd2siq:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [12:1.00]
-; ZNVER1-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [5:1.00]
-; ZNVER1-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cvtsd2siq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcvtsd2si (%rdi), %rax # sched: [12:1.00]
-; ZNVER1-NEXT: vcvtsd2si %xmm0, %rcx # sched: [5:1.00]
-; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = insertelement <2 x double> undef, double %a0, i32 0
- %2 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %1)
- %3 = load double, double *%a1, align 8
- %4 = insertelement <2 x double> undef, double %3, i32 0
- %5 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %4)
- %6 = add i64 %2, %5
- ret i64 %6
-}
-declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
-
-define float @test_cvtsd2ss(double %a0, double *%a1) {
-; GENERIC-LABEL: test_cvtsd2ss:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00]
-; GENERIC-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
-; GENERIC-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00]
-; GENERIC-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvtsd2ss:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero sched: [1:1.00]
-; ATOM-NEXT: cvtsd2ss %xmm0, %xmm2 # sched: [6:3.00]
-; ATOM-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: cvtsd2ss %xmm1, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT: addss %xmm2, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvtsd2ss:
-; SLM: # %bb.0:
-; SLM-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:0.50]
-; SLM-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [3:1.00]
-; SLM-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:0.50]
-; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cvtsd2ss:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00]
-; SANDY-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
-; SANDY-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00]
-; SANDY-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvtsd2ss:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
-; SANDY-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50]
-; SANDY-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
-; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cvtsd2ss:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00]
-; HASWELL-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
-; HASWELL-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00]
-; HASWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cvtsd2ss:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
-; HASWELL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
-; HASWELL-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
-; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cvtsd2ss:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00]
-; BROADWELL-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
-; BROADWELL-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00]
-; BROADWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvtsd2ss:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
-; BROADWELL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
-; BROADWELL-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
-; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cvtsd2ss:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [5:1.00]
-; SKYLAKE-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
-; SKYLAKE-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [5:1.00]
-; SKYLAKE-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvtsd2ss:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; SKYLAKE-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
-; SKYLAKE-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
-; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cvtsd2ss:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [5:1.00]
-; SKX-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
-; SKX-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [5:1.00]
-; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvtsd2ss:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; SKX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
-; SKX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
-; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cvtsd2ss:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00]
-; BDVER2-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
-; BDVER2-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00]
-; BDVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cvtsd2ss:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
-; BDVER2-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
-; BDVER2-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
-; BDVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cvtsd2ss:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [7:2.00]
-; BTVER2-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00]
-; BTVER2-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [7:2.00]
-; BTVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cvtsd2ss:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [7:2.00]
-; BTVER2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00]
-; BTVER2-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [7:2.00]
-; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cvtsd2ss:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00]
-; ZNVER1-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [8:0.50]
-; ZNVER1-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00]
-; ZNVER1-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cvtsd2ss:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50]
-; ZNVER1-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
-; ZNVER1-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
-; ZNVER1-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fptrunc double %a0 to float
- %2 = load double, double *%a1, align 8
- %3 = fptrunc double %2 to float
- %4 = fadd float %1, %3
- ret float %4
-}
-
-define double @test_cvtsi2sd(i32 %a0, i32 *%a1) {
-; GENERIC-LABEL: test_cvtsi2sd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00]
-; GENERIC-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvtsi2sd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [7:3.50]
-; ATOM-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [6:3.00]
-; ATOM-NEXT: addsd %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvtsi2sd:
-; SLM: # %bb.0:
-; SLM-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:0.50]
-; SLM-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cvtsi2sd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00]
-; SANDY-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvtsi2sd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00]
-; SANDY-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cvtsi2sd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00]
-; HASWELL-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
-; HASWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cvtsi2sd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00]
-; HASWELL-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cvtsi2sd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00]
-; BROADWELL-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
-; BROADWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvtsi2sd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00]
-; BROADWELL-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cvtsi2sd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [5:1.00]
-; SKYLAKE-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
-; SKYLAKE-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvtsi2sd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00]
-; SKYLAKE-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cvtsi2sd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [5:1.00]
-; SKX-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
-; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvtsi2sd:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00]
-; SKX-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cvtsi2sd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
-; BDVER2-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [14:1.00]
-; BDVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cvtsi2sd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [14:1.00]
-; BDVER2-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; BDVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cvtsi2sd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
-; BTVER2-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [10:1.00]
-; BTVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cvtsi2sd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [10:1.00]
-; BTVER2-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cvtsi2sd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [12:1.00]
-; ZNVER1-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [5:1.00]
-; ZNVER1-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cvtsi2sd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00]
-; ZNVER1-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [12:1.00]
-; ZNVER1-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = sitofp i32 %a0 to double
- %2 = load i32, i32 *%a1, align 8
- %3 = sitofp i32 %2 to double
- %4 = fadd double %1, %3
- ret double %4
-}
-
-define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) {
-; GENERIC-LABEL: test_cvtsi2sdq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00]
-; GENERIC-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvtsi2sdq:
-; ATOM: # %bb.0:
-; ATOM-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [7:3.50]
-; ATOM-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [6:3.00]
-; ATOM-NEXT: addsd %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvtsi2sdq:
-; SLM: # %bb.0:
-; SLM-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:0.50]
-; SLM-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cvtsi2sdq:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00]
-; SANDY-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvtsi2sdq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
-; SANDY-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cvtsi2sdq:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00]
-; HASWELL-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
-; HASWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cvtsi2sdq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
-; HASWELL-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cvtsi2sdq:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00]
-; BROADWELL-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
-; BROADWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvtsi2sdq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
-; BROADWELL-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cvtsi2sdq:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00]
-; SKYLAKE-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
-; SKYLAKE-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvtsi2sdq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00]
-; SKYLAKE-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cvtsi2sdq:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00]
-; SKX-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
-; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvtsi2sdq:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00]
-; SKX-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cvtsi2sdq:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [13:1.00]
-; BDVER2-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
-; BDVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cvtsi2sdq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [14:1.00]
-; BDVER2-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; BDVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cvtsi2sdq:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
-; BTVER2-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [10:1.00]
-; BTVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cvtsi2sdq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [10:1.00]
-; BTVER2-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00]
-; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cvtsi2sdq:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [12:1.00]
-; ZNVER1-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00]
-; ZNVER1-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cvtsi2sdq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00]
-; ZNVER1-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [12:1.00]
-; ZNVER1-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = sitofp i64 %a0 to double
- %2 = load i64, i64 *%a1, align 8
- %3 = sitofp i64 %2 to double
- %4 = fadd double %1, %3
- ret double %4
-}
-
-; TODO - cvtss2sd_m
-
-define double @test_cvtss2sd(float %a0, float *%a1) {
-; GENERIC-LABEL: test_cvtss2sd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
-; GENERIC-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvtss2sd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:1.00]
-; ATOM-NEXT: cvtss2sd %xmm0, %xmm2 # sched: [6:3.00]
-; ATOM-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: cvtss2sd %xmm1, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT: addsd %xmm2, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvtss2sd:
-; SLM: # %bb.0:
-; SLM-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [4:0.50]
-; SLM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [3:1.00]
-; SLM-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [4:0.50]
-; SLM-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cvtss2sd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [1:1.00]
-; SANDY-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
-; SANDY-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvtss2sd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
-; SANDY-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [1:1.00]
-; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cvtss2sd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [2:1.00]
-; HASWELL-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
-; HASWELL-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [2:1.00]
-; HASWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cvtss2sd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [2:1.00]
-; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; HASWELL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [2:1.00]
-; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cvtss2sd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [2:1.00]
-; BROADWELL-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
-; BROADWELL-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [2:1.00]
-; BROADWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvtss2sd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [2:1.00]
-; BROADWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; BROADWELL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [2:1.00]
-; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cvtss2sd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [5:1.00]
-; SKYLAKE-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKYLAKE-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [5:1.00]
-; SKYLAKE-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvtss2sd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; SKYLAKE-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKYLAKE-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
-; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cvtss2sd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [5:1.00]
-; SKX-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKX-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [5:1.00]
-; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvtss2sd:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKX-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
-; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cvtss2sd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [4:1.00]
-; BDVER2-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
-; BDVER2-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [4:1.00]
-; BDVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cvtss2sd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; BDVER2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
-; BDVER2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
-; BDVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cvtss2sd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [7:2.00]
-; BTVER2-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00]
-; BTVER2-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [7:2.00]
-; BTVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cvtss2sd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [7:2.00]
-; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
-; BTVER2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [7:2.00]
-; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cvtss2sd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [4:1.00]
-; ZNVER1-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [8:0.50]
-; ZNVER1-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [4:1.00]
-; ZNVER1-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cvtss2sd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50]
-; ZNVER1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
-; ZNVER1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [4:1.00]
-; ZNVER1-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fpext float %a0 to double
- %2 = load float, float *%a1, align 4
- %3 = fpext float %2 to double
- %4 = fadd double %1, %3
- ret double %4
-}
-
-define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) {
-; GENERIC-LABEL: test_cvttpd2dq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [4:1.00]
-; GENERIC-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [10:1.00]
-; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvttpd2dq:
-; ATOM: # %bb.0:
-; ATOM-NEXT: cvttpd2dq (%rdi), %xmm1 # sched: [8:4.00]
-; ATOM-NEXT: cvttpd2dq %xmm0, %xmm0 # sched: [7:3.50]
-; ATOM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvttpd2dq:
-; SLM: # %bb.0:
-; SLM-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [4:0.50]
-; SLM-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cvttpd2dq:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [4:1.00]
-; SANDY-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [10:1.00]
-; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvttpd2dq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00]
-; SANDY-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [10:1.00]
-; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cvttpd2dq:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [4:1.00]
-; HASWELL-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [10:1.00]
-; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cvttpd2dq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00]
-; HASWELL-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
-; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cvttpd2dq:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: cvttpd2dq (%rdi), %xmm1 # sched: [9:1.00]
-; BROADWELL-SSE-NEXT: cvttpd2dq %xmm0, %xmm0 # sched: [4:1.00]
-; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvttpd2dq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00]
-; BROADWELL-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
-; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cvttpd2dq:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [5:1.00]
-; SKYLAKE-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [11:1.00]
-; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvttpd2dq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00]
-; SKYLAKE-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
-; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cvttpd2dq:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [5:1.00]
-; SKX-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [11:1.00]
-; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvttpd2dq:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00]
-; SKX-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:0.50]
-; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cvttpd2dq:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [8:1.00]
-; BDVER2-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [13:1.00]
-; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cvttpd2dq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [13:1.00]
-; BDVER2-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [8:1.00]
-; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cvttpd2dq:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cvttpd2dq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
-; BTVER2-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cvttpd2dq:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [5:1.00]
-; ZNVER1-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [12:1.00]
-; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cvttpd2dq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [12:1.00]
-; ZNVER1-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00]
-; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fptosi <2 x double> %a0 to <2 x i32>
- %2 = shufflevector <2 x i32> %1, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %3 = load <2 x double>, <2 x double> *%a1, align 16
- %4 = fptosi <2 x double> %3 to <2 x i32>
- %5 = shufflevector <2 x i32> %4, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %6 = add <4 x i32> %2, %5
- ret <4 x i32> %6
-}
-
-define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) {
-; GENERIC-LABEL: test_cvttps2dq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00]
-; GENERIC-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvttps2dq:
-; ATOM: # %bb.0:
-; ATOM-NEXT: cvttps2dq (%rdi), %xmm1 # sched: [7:3.50]
-; ATOM-NEXT: cvttps2dq %xmm0, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvttps2dq:
-; SLM: # %bb.0:
-; SLM-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [4:0.50]
-; SLM-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cvttps2dq:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00]
-; SANDY-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvttps2dq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [9:1.00]
-; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cvttps2dq:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [9:1.00]
-; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cvttps2dq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [9:1.00]
-; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cvttps2dq:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvttps2dq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [8:1.00]
-; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cvttps2dq:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvttps2dq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [10:0.50]
-; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cvttps2dq:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [4:0.50]
-; SKX-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvttps2dq:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [10:0.50]
-; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cvttps2dq:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [4:1.00]
-; BDVER2-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [9:1.00]
-; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cvttps2dq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [9:1.00]
-; BDVER2-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [4:1.00]
-; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cvttps2dq:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cvttps2dq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [8:1.00]
-; BTVER2-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cvttps2dq:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [5:1.00]
-; ZNVER1-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [12:1.00]
-; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cvttps2dq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [12:1.00]
-; ZNVER1-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [5:1.00]
-; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fptosi <4 x float> %a0 to <4 x i32>
- %2 = load <4 x float>, <4 x float> *%a1, align 16
- %3 = fptosi <4 x float> %2 to <4 x i32>
- %4 = add <4 x i32> %1, %3
- ret <4 x i32> %4
-}
-
-define i32 @test_cvttsd2si(double %a0, double *%a1) {
-; GENERIC-LABEL: test_cvttsd2si:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: cvttsd2si %xmm0, %ecx # sched: [5:1.00]
-; GENERIC-NEXT: cvttsd2si (%rdi), %eax # sched: [9:1.00]
-; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvttsd2si:
-; ATOM: # %bb.0:
-; ATOM-NEXT: cvttsd2si (%rdi), %eax # sched: [9:4.50]
-; ATOM-NEXT: cvttsd2si %xmm0, %ecx # sched: [8:4.00]
-; ATOM-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvttsd2si:
-; SLM: # %bb.0:
-; SLM-NEXT: cvttsd2si (%rdi), %eax # sched: [7:1.00]
-; SLM-NEXT: cvttsd2si %xmm0, %ecx # sched: [4:0.50]
-; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cvttsd2si:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [5:1.00]
-; SANDY-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [9:1.00]
-; SANDY-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvttsd2si:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vcvttsd2si %xmm0, %ecx # sched: [5:1.00]
-; SANDY-NEXT: vcvttsd2si (%rdi), %eax # sched: [10:1.00]
-; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cvttsd2si:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [4:1.00]
-; HASWELL-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [9:1.00]
-; HASWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cvttsd2si:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcvttsd2si %xmm0, %ecx # sched: [4:1.00]
-; HASWELL-NEXT: vcvttsd2si (%rdi), %eax # sched: [9:1.00]
-; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cvttsd2si:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [9:1.00]
-; BROADWELL-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [4:1.00]
-; BROADWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvttsd2si:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcvttsd2si (%rdi), %eax # sched: [9:1.00]
-; BROADWELL-NEXT: vcvttsd2si %xmm0, %ecx # sched: [4:1.00]
-; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cvttsd2si:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [6:1.00]
-; SKYLAKE-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [11:1.00]
-; SKYLAKE-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvttsd2si:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcvttsd2si %xmm0, %ecx # sched: [6:1.00]
-; SKYLAKE-NEXT: vcvttsd2si (%rdi), %eax # sched: [11:1.00]
-; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cvttsd2si:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [6:1.00]
-; SKX-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [11:1.00]
-; SKX-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvttsd2si:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvttsd2si %xmm0, %ecx # sched: [6:1.00]
-; SKX-NEXT: vcvttsd2si (%rdi), %eax # sched: [11:1.00]
-; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cvttsd2si:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [18:1.00]
-; BDVER2-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [13:1.00]
-; BDVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cvttsd2si:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcvttsd2si (%rdi), %eax # sched: [18:1.00]
-; BDVER2-NEXT: vcvttsd2si %xmm0, %ecx # sched: [13:1.00]
-; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cvttsd2si:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [12:1.00]
-; BTVER2-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [7:1.00]
-; BTVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cvttsd2si:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcvttsd2si (%rdi), %eax # sched: [12:1.00]
-; BTVER2-NEXT: vcvttsd2si %xmm0, %ecx # sched: [7:1.00]
-; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cvttsd2si:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [12:1.00]
-; ZNVER1-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [5:1.00]
-; ZNVER1-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cvttsd2si:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcvttsd2si (%rdi), %eax # sched: [12:1.00]
-; ZNVER1-NEXT: vcvttsd2si %xmm0, %ecx # sched: [5:1.00]
-; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fptosi double %a0 to i32
- %2 = load double, double *%a1, align 8
- %3 = fptosi double %2 to i32
- %4 = add i32 %1, %3
- ret i32 %4
-}
-
-define i64 @test_cvttsd2siq(double %a0, double *%a1) {
-; GENERIC-LABEL: test_cvttsd2siq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: cvttsd2si %xmm0, %rcx # sched: [5:1.00]
-; GENERIC-NEXT: cvttsd2si (%rdi), %rax # sched: [9:1.00]
-; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_cvttsd2siq:
-; ATOM: # %bb.0:
-; ATOM-NEXT: cvttsd2si (%rdi), %rax # sched: [9:4.50]
-; ATOM-NEXT: cvttsd2si %xmm0, %rcx # sched: [8:4.00]
-; ATOM-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_cvttsd2siq:
-; SLM: # %bb.0:
-; SLM-NEXT: cvttsd2si (%rdi), %rax # sched: [7:1.00]
-; SLM-NEXT: cvttsd2si %xmm0, %rcx # sched: [4:0.50]
-; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_cvttsd2siq:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [5:1.00]
-; SANDY-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [9:1.00]
-; SANDY-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_cvttsd2siq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vcvttsd2si %xmm0, %rcx # sched: [5:1.00]
-; SANDY-NEXT: vcvttsd2si (%rdi), %rax # sched: [10:1.00]
-; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_cvttsd2siq:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [4:1.00]
-; HASWELL-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [9:1.00]
-; HASWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_cvttsd2siq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vcvttsd2si %xmm0, %rcx # sched: [4:1.00]
-; HASWELL-NEXT: vcvttsd2si (%rdi), %rax # sched: [9:1.00]
-; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_cvttsd2siq:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [9:1.00]
-; BROADWELL-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [4:1.00]
-; BROADWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_cvttsd2siq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vcvttsd2si (%rdi), %rax # sched: [9:1.00]
-; BROADWELL-NEXT: vcvttsd2si %xmm0, %rcx # sched: [4:1.00]
-; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_cvttsd2siq:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [6:1.00]
-; SKYLAKE-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [11:1.00]
-; SKYLAKE-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_cvttsd2siq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vcvttsd2si %xmm0, %rcx # sched: [6:1.00]
-; SKYLAKE-NEXT: vcvttsd2si (%rdi), %rax # sched: [11:1.00]
-; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_cvttsd2siq:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [6:1.00]
-; SKX-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [11:1.00]
-; SKX-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_cvttsd2siq:
-; SKX: # %bb.0:
-; SKX-NEXT: vcvttsd2si %xmm0, %rcx # sched: [6:1.00]
-; SKX-NEXT: vcvttsd2si (%rdi), %rax # sched: [11:1.00]
-; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_cvttsd2siq:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [18:1.00]
-; BDVER2-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [13:1.00]
-; BDVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_cvttsd2siq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vcvttsd2si (%rdi), %rax # sched: [18:1.00]
-; BDVER2-NEXT: vcvttsd2si %xmm0, %rcx # sched: [13:1.00]
-; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_cvttsd2siq:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [12:1.00]
-; BTVER2-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [7:1.00]
-; BTVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_cvttsd2siq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vcvttsd2si (%rdi), %rax # sched: [12:1.00]
-; BTVER2-NEXT: vcvttsd2si %xmm0, %rcx # sched: [7:1.00]
-; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_cvttsd2siq:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [12:1.00]
-; ZNVER1-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [5:1.00]
-; ZNVER1-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_cvttsd2siq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vcvttsd2si (%rdi), %rax # sched: [12:1.00]
-; ZNVER1-NEXT: vcvttsd2si %xmm0, %rcx # sched: [5:1.00]
-; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fptosi double %a0 to i64
- %2 = load double, double *%a1, align 8
- %3 = fptosi double %2 to i64
- %4 = add i64 %1, %3
- ret i64 %4
-}
-
-define <2 x double> @test_divpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_divpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: divpd %xmm1, %xmm0 # sched: [22:22.00]
-; GENERIC-NEXT: divpd (%rdi), %xmm0 # sched: [28:22.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_divpd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: divpd %xmm1, %xmm0 # sched: [125:62.50]
-; ATOM-NEXT: divpd (%rdi), %xmm0 # sched: [125:62.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_divpd:
-; SLM: # %bb.0:
-; SLM-NEXT: divpd %xmm1, %xmm0 # sched: [69:69.00]
-; SLM-NEXT: divpd (%rdi), %xmm0 # sched: [72:69.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_divpd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [22:22.00]
-; SANDY-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [28:22.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_divpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [22:22.00]
-; SANDY-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [28:22.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_divpd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [20:14.00]
-; HASWELL-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [26:14.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_divpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [20:14.00]
-; HASWELL-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [26:14.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_divpd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [14:8.00]
-; BROADWELL-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [19:8.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_divpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:8.00]
-; BROADWELL-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [19:8.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_divpd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [14:3.00]
-; SKYLAKE-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [20:4.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_divpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:3.00]
-; SKYLAKE-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [20:4.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_divpd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [14:3.00]
-; SKX-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [20:4.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_divpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:3.00]
-; SKX-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [20:4.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_divpd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [9:9.50]
-; BDVER2-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [14:9.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_divpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [9:9.50]
-; BDVER2-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [14:9.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_divpd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [19:19.00]
-; BTVER2-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [24:19.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_divpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
-; BTVER2-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [24:19.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_divpd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [15:1.00]
-; ZNVER1-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [22:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_divpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [15:1.00]
-; ZNVER1-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [22:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fdiv <2 x double> %a0, %a1
- %2 = load <2 x double>, <2 x double> *%a2, align 16
- %3 = fdiv <2 x double> %1, %2
- ret <2 x double> %3
-}
-
-define double @test_divsd(double %a0, double %a1, double *%a2) {
-; GENERIC-LABEL: test_divsd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: divsd %xmm1, %xmm0 # sched: [22:22.00]
-; GENERIC-NEXT: divsd (%rdi), %xmm0 # sched: [28:22.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_divsd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: divsd %xmm1, %xmm0 # sched: [62:31.00]
-; ATOM-NEXT: divsd (%rdi), %xmm0 # sched: [62:31.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_divsd:
-; SLM: # %bb.0:
-; SLM-NEXT: divsd %xmm1, %xmm0 # sched: [34:32.00]
-; SLM-NEXT: divsd (%rdi), %xmm0 # sched: [37:32.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_divsd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [22:22.00]
-; SANDY-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [28:22.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_divsd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [22:22.00]
-; SANDY-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [28:22.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_divsd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [20:14.00]
-; HASWELL-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [25:14.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_divsd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [20:14.00]
-; HASWELL-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [25:14.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_divsd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [14:4.00]
-; BROADWELL-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [19:8.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_divsd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:4.00]
-; BROADWELL-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:8.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_divsd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [14:3.00]
-; SKYLAKE-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [19:4.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_divsd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:3.00]
-; SKYLAKE-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:4.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_divsd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [14:3.00]
-; SKX-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [19:4.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_divsd:
-; SKX: # %bb.0:
-; SKX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:3.00]
-; SKX-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:4.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_divsd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [9:9.50]
-; BDVER2-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [14:9.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_divsd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [9:9.50]
-; BDVER2-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [14:9.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_divsd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [19:19.00]
-; BTVER2-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [24:19.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_divsd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
-; BTVER2-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [24:19.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_divsd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [15:1.00]
-; ZNVER1-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [22:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_divsd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [15:1.00]
-; ZNVER1-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [22:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fdiv double %a0, %a1
- %2 = load double, double *%a2, align 8
- %3 = fdiv double %1, %2
- ret double %3
-}
-
-define void @test_lfence() {
-; GENERIC-LABEL: test_lfence:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: lfence # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_lfence:
-; ATOM: # %bb.0:
-; ATOM-NEXT: lfence # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_lfence:
-; SLM: # %bb.0:
-; SLM-NEXT: lfence # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_lfence:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: lfence # sched: [1:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_lfence:
-; SANDY: # %bb.0:
-; SANDY-NEXT: lfence # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_lfence:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: lfence # sched: [2:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_lfence:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: lfence # sched: [2:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_lfence:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: lfence # sched: [2:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_lfence:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: lfence # sched: [2:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_lfence:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: lfence # sched: [2:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_lfence:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: lfence # sched: [2:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_lfence:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: lfence # sched: [2:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_lfence:
-; SKX: # %bb.0:
-; SKX-NEXT: lfence # sched: [2:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_lfence:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: lfence # sched: [1:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_lfence:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: lfence # sched: [1:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_lfence:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: lfence # sched: [1:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_lfence:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: lfence # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_lfence:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: lfence # sched: [1:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_lfence:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: lfence # sched: [1:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void @llvm.x86.sse2.lfence()
- ret void
-}
-declare void @llvm.x86.sse2.lfence() nounwind readnone
-
-define void @test_mfence() {
-; GENERIC-LABEL: test_mfence:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: mfence # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_mfence:
-; ATOM: # %bb.0:
-; ATOM-NEXT: mfence # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_mfence:
-; SLM: # %bb.0:
-; SLM-NEXT: mfence # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_mfence:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: mfence # sched: [1:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_mfence:
-; SANDY: # %bb.0:
-; SANDY-NEXT: mfence # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_mfence:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: mfence # sched: [2:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_mfence:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: mfence # sched: [2:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_mfence:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: mfence # sched: [2:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_mfence:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: mfence # sched: [2:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_mfence:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: mfence # sched: [3:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_mfence:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: mfence # sched: [3:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_mfence:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: mfence # sched: [3:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_mfence:
-; SKX: # %bb.0:
-; SKX-NEXT: mfence # sched: [3:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_mfence:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: mfence # sched: [1:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_mfence:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: mfence # sched: [1:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_mfence:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: mfence # sched: [1:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_mfence:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: mfence # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_mfence:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: mfence # sched: [1:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_mfence:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: mfence # sched: [1:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void @llvm.x86.sse2.mfence()
- ret void
-}
-declare void @llvm.x86.sse2.mfence() nounwind readnone
-
-define void @test_maskmovdqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) {
-; GENERIC-LABEL: test_maskmovdqu:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_maskmovdqu:
-; ATOM: # %bb.0:
-; ATOM-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_maskmovdqu:
-; SLM: # %bb.0:
-; SLM-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_maskmovdqu:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_maskmovdqu:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_maskmovdqu:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_maskmovdqu:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_maskmovdqu:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_maskmovdqu:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_maskmovdqu:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_maskmovdqu:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_maskmovdqu:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_maskmovdqu:
-; SKX: # %bb.0:
-; SKX-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_maskmovdqu:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_maskmovdqu:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_maskmovdqu:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_maskmovdqu:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_maskmovdqu:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_maskmovdqu:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2)
- ret void
-}
-declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind
-
-define <2 x double> @test_maxpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_maxpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: maxpd (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_maxpd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: maxpd %xmm1, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT: maxpd (%rdi), %xmm0 # sched: [7:3.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_maxpd:
-; SLM: # %bb.0:
-; SLM-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: maxpd (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_maxpd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_maxpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_maxpd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [9:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_maxpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_maxpd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_maxpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_maxpd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_maxpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_maxpd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_maxpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_maxpd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [2:1.00]
-; BDVER2-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [7:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_maxpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BDVER2-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_maxpd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_maxpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_maxpd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_maxpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1)
- %2 = load <2 x double>, <2 x double> *%a2, align 16
- %3 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %1, <2 x double> %2)
- ret <2 x double> %3
-}
-declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
-
-define <2 x double> @test_maxsd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_maxsd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: maxsd (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_maxsd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: maxsd %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: maxsd (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_maxsd:
-; SLM: # %bb.0:
-; SLM-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: maxsd (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_maxsd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_maxsd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_maxsd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_maxsd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_maxsd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_maxsd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_maxsd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [9:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_maxsd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_maxsd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [9:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_maxsd:
-; SKX: # %bb.0:
-; SKX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_maxsd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [2:1.00]
-; BDVER2-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [7:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_maxsd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BDVER2-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_maxsd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_maxsd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_maxsd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_maxsd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1)
- %2 = load <2 x double>, <2 x double> *%a2, align 16
- %3 = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %1, <2 x double> %2)
- ret <2 x double> %3
-}
-declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
-
-define <2 x double> @test_minpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_minpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: minpd (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_minpd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: minpd %xmm1, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT: minpd (%rdi), %xmm0 # sched: [7:3.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_minpd:
-; SLM: # %bb.0:
-; SLM-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: minpd (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_minpd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_minpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_minpd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [9:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_minpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_minpd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_minpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_minpd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_minpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_minpd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_minpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_minpd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [2:1.00]
-; BDVER2-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [7:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_minpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BDVER2-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_minpd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_minpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_minpd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_minpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1)
- %2 = load <2 x double>, <2 x double> *%a2, align 16
- %3 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %1, <2 x double> %2)
- ret <2 x double> %3
-}
-declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
-
-define <2 x double> @test_minsd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_minsd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: minsd (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_minsd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: minsd %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: minsd (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_minsd:
-; SLM: # %bb.0:
-; SLM-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: minsd (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_minsd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_minsd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_minsd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_minsd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_minsd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_minsd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_minsd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [9:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_minsd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_minsd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [9:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_minsd:
-; SKX: # %bb.0:
-; SKX-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_minsd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [2:1.00]
-; BDVER2-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [7:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_minsd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BDVER2-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_minsd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_minsd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_minsd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_minsd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1)
- %2 = load <2 x double>, <2 x double> *%a2, align 16
- %3 = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %1, <2 x double> %2)
- ret <2 x double> %3
-}
-declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
-
-define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) {
-; GENERIC-LABEL: test_movapd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50]
-; GENERIC-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movapd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movapd (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: addpd %xmm0, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movapd:
-; SLM: # %bb.0:
-; SLM-NEXT: movapd (%rdi), %xmm0 # sched: [3:1.00]
-; SLM-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movapd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50]
-; SANDY-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movapd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50]
-; SANDY-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movapd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50]
-; HASWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movapd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50]
-; HASWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movapd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [5:0.50]
-; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movapd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmovapd (%rdi), %xmm0 # sched: [5:0.50]
-; BROADWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movapd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50]
-; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movapd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50]
-; SKYLAKE-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movapd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50]
-; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movapd:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50]
-; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movapd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [5:0.50]
-; BDVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movapd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovapd (%rdi), %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movapd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [5:1.00]
-; BTVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movapd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovapd (%rdi), %xmm0 # sched: [5:1.00]
-; BTVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movapd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movapd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovapd (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load <2 x double>, <2 x double> *%a0, align 16
- %2 = fadd <2 x double> %1, %1
- store <2 x double> %2, <2 x double> *%a1, align 16
- ret void
-}
-
-define void @test_movdqa(<2 x i64> *%a0, <2 x i64> *%a1) {
-; GENERIC-LABEL: test_movdqa:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50]
-; GENERIC-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movdqa:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movdqa (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: paddq %xmm0, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movdqa:
-; SLM: # %bb.0:
-; SLM-NEXT: movdqa (%rdi), %xmm0 # sched: [3:1.00]
-; SLM-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movdqa:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50]
-; SANDY-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movdqa:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50]
-; SANDY-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movdqa:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50]
-; HASWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movdqa:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50]
-; HASWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movdqa:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [5:0.50]
-; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movdqa:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmovdqa (%rdi), %xmm0 # sched: [5:0.50]
-; BROADWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movdqa:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50]
-; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movdqa:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50]
-; SKYLAKE-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movdqa:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50]
-; SKX-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movdqa:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50]
-; SKX-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movdqa:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [5:0.50]
-; BDVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movdqa:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovdqa (%rdi), %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movdqa:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [5:1.00]
-; BTVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movdqa:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovdqa (%rdi), %xmm0 # sched: [5:1.00]
-; BTVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movdqa:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movdqa:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovdqa (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load <2 x i64>, <2 x i64> *%a0, align 16
- %2 = add <2 x i64> %1, %1
- store <2 x i64> %2, <2 x i64> *%a1, align 16
- ret void
-}
-
-define void @test_movdqu(<2 x i64> *%a0, <2 x i64> *%a1) {
-; GENERIC-LABEL: test_movdqu:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50]
-; GENERIC-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movdqu:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movdqu (%rdi), %xmm0 # sched: [3:1.50]
-; ATOM-NEXT: paddq %xmm0, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT: movdqu %xmm0, (%rsi) # sched: [2:1.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movdqu:
-; SLM: # %bb.0:
-; SLM-NEXT: movdqu (%rdi), %xmm0 # sched: [3:1.00]
-; SLM-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movdqu:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50]
-; SANDY-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movdqu:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50]
-; SANDY-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movdqu:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50]
-; HASWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movdqu:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50]
-; HASWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movdqu:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [5:0.50]
-; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movdqu:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmovdqu (%rdi), %xmm0 # sched: [5:0.50]
-; BROADWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movdqu:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50]
-; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movdqu:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50]
-; SKYLAKE-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movdqu:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50]
-; SKX-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movdqu:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50]
-; SKX-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movdqu:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [5:0.50]
-; BDVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movdqu:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovdqu (%rdi), %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movdqu:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [5:1.00]
-; BTVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movdqu:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovdqu (%rdi), %xmm0 # sched: [5:1.00]
-; BTVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movdqu:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movdqu:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovdqu (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load <2 x i64>, <2 x i64> *%a0, align 1
- %2 = add <2 x i64> %1, %1
- store <2 x i64> %2, <2 x i64> *%a1, align 1
- ret void
-}
-
-define i32 @test_movd(<4 x i32> %a0, i32 %a1, i32 *%a2) {
-; GENERIC-LABEL: test_movd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movd %edi, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
-; GENERIC-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
-; GENERIC-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50]
-; GENERIC-NEXT: movd %xmm2, %eax # sched: [2:1.00]
-; GENERIC-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:1.00]
-; ATOM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT: movd %xmm1, %eax # sched: [3:3.00]
-; ATOM-NEXT: movd %edi, %xmm1 # sched: [1:1.00]
-; ATOM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movd:
-; SLM: # %bb.0:
-; SLM-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [3:1.00]
-; SLM-NEXT: movd %edi, %xmm1 # sched: [1:0.50]
-; SLM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
-; SLM-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00]
-; SLM-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50]
-; SLM-NEXT: movd %xmm2, %eax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00]
-; SANDY-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
-; SANDY-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50]
-; SANDY-SSE-NEXT: movd %xmm2, %eax # sched: [2:1.00]
-; SANDY-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00]
-; SANDY-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
-; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vmovd %xmm0, %eax # sched: [2:1.00]
-; SANDY-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; HASWELL-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: movd %xmm2, %eax # sched: [1:1.00]
-; HASWELL-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00]
-; HASWELL-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; HASWELL-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vmovd %xmm0, %eax # sched: [1:1.00]
-; HASWELL-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: movd %xmm2, %eax # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00]
-; BROADWELL-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; BROADWELL-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vmovd %xmm0, %eax # sched: [1:1.00]
-; BROADWELL-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKYLAKE-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: movd %xmm2, %eax # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00]
-; SKYLAKE-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.33]
-; SKYLAKE-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vmovd %xmm0, %eax # sched: [2:1.00]
-; SKYLAKE-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00]
-; SKX-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKX-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.33]
-; SKX-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.33]
-; SKX-SSE-NEXT: movd %xmm2, %eax # sched: [2:1.00]
-; SKX-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movd:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00]
-; SKX-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.33]
-; SKX-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: vmovd %xmm0, %eax # sched: [2:1.00]
-; SKX-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; BDVER2-SSE-NEXT: movd %edi, %xmm1 # sched: [10:0.50]
-; BDVER2-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: movd %xmm2, %eax # sched: [10:1.00]
-; BDVER2-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: movd %xmm1, (%rsi) # sched: [2:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovd %edi, %xmm1 # sched: [10:0.50]
-; BDVER2-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
-; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vmovd %xmm0, %eax # sched: [10:1.00]
-; BDVER2-NEXT: vmovd %xmm1, (%rsi) # sched: [2:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
-; BTVER2-SSE-NEXT: movd %edi, %xmm1 # sched: [8:0.50]
-; BTVER2-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: movd %xmm2, %eax # sched: [4:1.00]
-; BTVER2-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: movd %xmm1, (%rsi) # sched: [2:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovd %edi, %xmm1 # sched: [8:0.50]
-; BTVER2-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
-; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vmovd %xmm0, %eax # sched: [4:1.00]
-; BTVER2-NEXT: vmovd %xmm1, (%rsi) # sched: [2:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [8:0.50]
-; ZNVER1-SSE-NEXT: movd %edi, %xmm1 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:0.50]
-; ZNVER1-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: movd %xmm2, %eax # sched: [2:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [8:0.50]
-; ZNVER1-NEXT: vmovd %edi, %xmm1 # sched: [3:1.00]
-; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-NEXT: vmovd %xmm1, (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vmovd %xmm0, %eax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = insertelement <4 x i32> undef, i32 %a1, i32 0
- %2 = load i32, i32 *%a2
- %3 = insertelement <4 x i32> undef, i32 %2, i32 0
- %4 = add <4 x i32> %a0, %1
- %5 = add <4 x i32> %a0, %3
- %6 = extractelement <4 x i32> %4, i32 0
- %7 = extractelement <4 x i32> %5, i32 0
- store i32 %6, i32* %a2
- ret i32 %7
-}
-
-define i64 @test_movd_64(<2 x i64> %a0, i64 %a1, i64 *%a2) {
-; GENERIC-LABEL: test_movd_64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movq %rdi, %xmm1 # sched: [1:1.00]
-; GENERIC-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [6:0.50]
-; GENERIC-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
-; GENERIC-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50]
-; GENERIC-NEXT: movq %xmm2, %rax # sched: [2:1.00]
-; GENERIC-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movd_64:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [1:1.00]
-; ATOM-NEXT: movq %rdi, %xmm2 # sched: [1:1.00]
-; ATOM-NEXT: paddq %xmm0, %xmm1 # sched: [2:1.00]
-; ATOM-NEXT: paddq %xmm0, %xmm2 # sched: [2:1.00]
-; ATOM-NEXT: movq %xmm1, %rax # sched: [3:3.00]
-; ATOM-NEXT: movq %xmm2, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movd_64:
-; SLM: # %bb.0:
-; SLM-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [3:1.00]
-; SLM-NEXT: movq %rdi, %xmm1 # sched: [1:0.50]
-; SLM-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
-; SLM-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00]
-; SLM-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50]
-; SLM-NEXT: movq %xmm2, %rax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movd_64:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00]
-; SANDY-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [6:0.50]
-; SANDY-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50]
-; SANDY-SSE-NEXT: movq %xmm2, %rax # sched: [2:1.00]
-; SANDY-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movd_64:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00]
-; SANDY-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [6:0.50]
-; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vmovq %xmm0, %rax # sched: [2:1.00]
-; SANDY-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movd_64:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
-; HASWELL-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: movq %xmm2, %rax # sched: [1:1.00]
-; HASWELL-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movd_64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00]
-; HASWELL-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
-; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; HASWELL-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vmovq %xmm0, %rax # sched: [1:1.00]
-; HASWELL-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movd_64:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
-; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: movq %xmm2, %rax # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movd_64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00]
-; BROADWELL-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
-; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; BROADWELL-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vmovq %xmm0, %rax # sched: [1:1.00]
-; BROADWELL-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movd_64:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
-; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: movq %xmm2, %rax # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movd_64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00]
-; SKYLAKE-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
-; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.33]
-; SKYLAKE-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vmovq %xmm0, %rax # sched: [2:1.00]
-; SKYLAKE-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movd_64:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00]
-; SKX-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
-; SKX-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.33]
-; SKX-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.33]
-; SKX-SSE-NEXT: movq %xmm2, %rax # sched: [2:1.00]
-; SKX-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movd_64:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00]
-; SKX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
-; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.33]
-; SKX-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: vmovq %xmm0, %rax # sched: [2:1.00]
-; SKX-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movd_64:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
-; BDVER2-SSE-NEXT: movq %rdi, %xmm1 # sched: [10:0.50]
-; BDVER2-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: movq %xmm2, %rax # sched: [10:1.00]
-; BDVER2-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: movq %xmm1, (%rsi) # sched: [2:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movd_64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovq %rdi, %xmm1 # sched: [10:0.50]
-; BDVER2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
-; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vmovq %xmm0, %rax # sched: [10:1.00]
-; BDVER2-NEXT: vmovq %xmm1, (%rsi) # sched: [2:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movd_64:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:1.00]
-; BTVER2-SSE-NEXT: movq %rdi, %xmm1 # sched: [8:0.50]
-; BTVER2-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: movq %xmm2, %rax # sched: [4:1.00]
-; BTVER2-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: movq %xmm1, (%rsi) # sched: [2:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movd_64:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovq %rdi, %xmm1 # sched: [8:0.50]
-; BTVER2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:1.00]
-; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vmovq %xmm0, %rax # sched: [4:1.00]
-; BTVER2-NEXT: vmovq %xmm1, (%rsi) # sched: [2:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movd_64:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [8:0.50]
-; ZNVER1-SSE-NEXT: movq %rdi, %xmm1 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:0.50]
-; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: movq %xmm2, %rax # sched: [2:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movd_64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [8:0.50]
-; ZNVER1-NEXT: vmovq %rdi, %xmm1 # sched: [3:1.00]
-; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-NEXT: vmovq %xmm1, (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vmovq %xmm0, %rax # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = insertelement <2 x i64> undef, i64 %a1, i64 0
- %2 = load i64, i64 *%a2
- %3 = insertelement <2 x i64> undef, i64 %2, i64 0
- %4 = add <2 x i64> %a0, %1
- %5 = add <2 x i64> %a0, %3
- %6 = extractelement <2 x i64> %4, i64 0
- %7 = extractelement <2 x i64> %5, i64 0
- store i64 %6, i64* %a2
- ret i64 %7
-}
-
-define <2 x double> @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) {
-; GENERIC-LABEL: test_movhpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
-; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: movhpd %xmm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movhpd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50]
-; ATOM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00]
-; ATOM-NEXT: addpd %xmm1, %xmm2 # sched: [6:3.00]
-; ATOM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: movhpd %xmm2, (%rdi) # sched: [1:1.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movhpd:
-; SLM: # %bb.0:
-; SLM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00]
-; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: movhpd %xmm0, (%rdi) # sched: [1:1.00]
-; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movhpd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
-; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [1:1.00]
-; SANDY-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movhpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
-; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
-; SANDY-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movhpd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [1:1.00]
-; HASWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movhpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
-; HASWELL-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movhpd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movhpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
-; BROADWELL-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movhpd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movhpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
-; SKYLAKE-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movhpd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [1:1.00]
-; SKX-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movhpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movhpd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50]
-; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [2:1.00]
-; BDVER2-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movhpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50]
-; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: vmovhpd %xmm0, (%rdi) # sched: [2:1.00]
-; BDVER2-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movhpd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [2:1.00]
-; BTVER2-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movhpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmovhpd %xmm0, (%rdi) # sched: [2:1.00]
-; BTVER2-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movhpd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
-; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [1:0.50]
-; ZNVER1-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movhpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
-; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:0.50]
-; ZNVER1-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = bitcast x86_mmx* %a2 to double*
- %2 = load double, double *%1, align 8
- %3 = insertelement <2 x double> %a1, double %2, i32 1
- %4 = fadd <2 x double> %a0, %3
- %5 = extractelement <2 x double> %4, i32 1
- store double %5, double* %1
- ret <2 x double> %3
-}
-
-define <2 x double> @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) {
-; GENERIC-LABEL: test_movlpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
-; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: movlpd %xmm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movlpd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50]
-; ATOM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00]
-; ATOM-NEXT: addpd %xmm1, %xmm2 # sched: [6:3.00]
-; ATOM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: movlpd %xmm2, (%rdi) # sched: [1:1.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movlpd:
-; SLM: # %bb.0:
-; SLM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [4:1.00]
-; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: movlpd %xmm0, (%rdi) # sched: [1:1.00]
-; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movlpd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
-; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [1:1.00]
-; SANDY-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movlpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
-; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
-; SANDY-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movlpd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [1:1.00]
-; HASWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movlpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
-; HASWELL-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movlpd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movlpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
-; BROADWELL-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movlpd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movlpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
-; SKYLAKE-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movlpd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [1:1.00]
-; SKX-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movlpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movlpd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:0.50]
-; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [2:1.00]
-; BDVER2-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movlpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:0.50]
-; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: vmovlpd %xmm0, (%rdi) # sched: [2:1.00]
-; BDVER2-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movlpd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [2:1.00]
-; BTVER2-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movlpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmovlpd %xmm0, (%rdi) # sched: [2:1.00]
-; BTVER2-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movlpd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50]
-; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [1:0.50]
-; ZNVER1-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movlpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50]
-; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:0.50]
-; ZNVER1-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = bitcast x86_mmx* %a2 to double*
- %2 = load double, double *%1, align 8
- %3 = insertelement <2 x double> %a1, double %2, i32 0
- %4 = fadd <2 x double> %a0, %3
- %5 = extractelement <2 x double> %4, i32 0
- store double %5, double* %1
- ret <2 x double> %3
-}
-
-define i32 @test_movmskpd(<2 x double> %a0) {
-; GENERIC-LABEL: test_movmskpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movmskpd %xmm0, %eax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movmskpd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movmskpd %xmm0, %eax # sched: [3:3.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movmskpd:
-; SLM: # %bb.0:
-; SLM-NEXT: movmskpd %xmm0, %eax # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movmskpd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movmskpd %xmm0, %eax # sched: [2:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movmskpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movmskpd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movmskpd %xmm0, %eax # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movmskpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmovmskpd %xmm0, %eax # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movmskpd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movmskpd %xmm0, %eax # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movmskpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmovmskpd %xmm0, %eax # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movmskpd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movmskpd %xmm0, %eax # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movmskpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movmskpd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movmskpd %xmm0, %eax # sched: [2:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movmskpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movmskpd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movmskpd %xmm0, %eax # sched: [10:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movmskpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovmskpd %xmm0, %eax # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movmskpd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movmskpd %xmm0, %eax # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movmskpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovmskpd %xmm0, %eax # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movmskpd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movmskpd %xmm0, %eax # sched: [1:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movmskpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovmskpd %xmm0, %eax # sched: [1:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
- ret i32 %1
-}
-declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
-
-define void @test_movntdqa(<2 x i64> %a0, <2 x i64> *%a1) {
-; GENERIC-LABEL: test_movntdqa:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movntdqa:
-; ATOM: # %bb.0:
-; ATOM-NEXT: paddq %xmm0, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movntdqa:
-; SLM: # %bb.0:
-; SLM-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movntdqa:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movntdqa:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movntdqa:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movntdqa:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movntdqa:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movntdqa:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movntdqa:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movntdqa:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movntdqa:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movntdqa:
-; SKX: # %bb.0:
-; SKX-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movntdqa:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [2:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movntdqa:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vmovntdq %xmm0, (%rdi) # sched: [2:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movntdqa:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [2:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movntdqa:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vmovntdq %xmm0, (%rdi) # sched: [2:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movntdqa:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movntdqa:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = add <2 x i64> %a0, %a0
- store <2 x i64> %1, <2 x i64> *%a1, align 16, !nontemporal !0
- ret void
-}
-
-define void @test_movntpd(<2 x double> %a0, <2 x double> *%a1) {
-; GENERIC-LABEL: test_movntpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movntpd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: addpd %xmm0, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movntpd:
-; SLM: # %bb.0:
-; SLM-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movntpd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movntpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movntpd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movntpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movntpd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movntpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movntpd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movntpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movntpd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movntpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movntpd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [3:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movntpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: vmovntpd %xmm0, (%rdi) # sched: [3:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movntpd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movntpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmovntpd %xmm0, (%rdi) # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movntpd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movntpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fadd <2 x double> %a0, %a0
- store <2 x double> %1, <2 x double> *%a1, align 16, !nontemporal !0
- ret void
-}
-
-define <2 x i64> @test_movq_mem(<2 x i64> %a0, i64 *%a1) {
-; GENERIC-LABEL: test_movq_mem:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50]
-; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movq_mem:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [1:1.00]
-; ATOM-NEXT: paddq %xmm1, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movq_mem:
-; SLM: # %bb.0:
-; SLM-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [3:1.00]
-; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movq_mem:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50]
-; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movq_mem:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50]
-; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movq_mem:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
-; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movq_mem:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
-; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movq_mem:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
-; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movq_mem:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
-; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movq_mem:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
-; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movq_mem:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
-; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movq_mem:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
-; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movq_mem:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
-; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movq_mem:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
-; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: movq %xmm0, (%rdi) # sched: [2:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movq_mem:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
-; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vmovq %xmm0, (%rdi) # sched: [2:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movq_mem:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00]
-; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: movq %xmm0, (%rdi) # sched: [2:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movq_mem:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00]
-; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vmovq %xmm0, (%rdi) # sched: [2:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movq_mem:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50]
-; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movq_mem:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50]
-; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vmovq %xmm0, (%rdi) # sched: [1:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load i64, i64* %a1, align 1
- %2 = insertelement <2 x i64> zeroinitializer, i64 %1, i32 0
- %3 = add <2 x i64> %a0, %2
- %4 = extractelement <2 x i64> %3, i32 0
- store i64 %4, i64 *%a1, align 1
- ret <2 x i64> %3
-}
-
-define <2 x i64> @test_movq_reg(<2 x i64> %a0, <2 x i64> %a1) {
-; GENERIC-LABEL: test_movq_reg:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
-; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movq_reg:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50]
-; ATOM-NEXT: paddq %xmm1, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movq_reg:
-; SLM: # %bb.0:
-; SLM-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50]
-; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movq_reg:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
-; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movq_reg:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
-; SANDY-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movq_reg:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
-; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movq_reg:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
-; HASWELL-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movq_reg:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
-; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movq_reg:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
-; BROADWELL-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movq_reg:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movq_reg:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
-; SKYLAKE-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movq_reg:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
-; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movq_reg:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
-; SKX-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movq_reg:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [2:0.50]
-; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movq_reg:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [2:0.50]
-; BDVER2-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movq_reg:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50]
-; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movq_reg:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50]
-; BTVER2-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movq_reg:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.25]
-; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movq_reg:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.25]
-; ZNVER1-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2>
- %2 = add <2 x i64> %a1, %1
- ret <2 x i64> %2
-}
-
-define void @test_movsd_mem(double* %a0, double* %a1) {
-; GENERIC-LABEL: test_movsd_mem:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
-; GENERIC-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movsd_mem:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [1:1.00]
-; ATOM-NEXT: addsd %xmm0, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movsd_mem:
-; SLM: # %bb.0:
-; SLM-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [3:1.00]
-; SLM-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movsd_mem:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
-; SANDY-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movsd_mem:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
-; SANDY-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movsd_mem:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
-; HASWELL-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movsd_mem:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
-; HASWELL-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movsd_mem:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
-; BROADWELL-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movsd_mem:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
-; BROADWELL-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movsd_mem:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
-; SKYLAKE-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movsd_mem:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
-; SKYLAKE-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movsd_mem:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
-; SKX-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movsd_mem:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
-; SKX-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movsd_mem:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
-; BDVER2-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [2:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movsd_mem:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
-; BDVER2-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: vmovsd %xmm0, (%rsi) # sched: [2:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movsd_mem:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00]
-; BTVER2-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [2:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movsd_mem:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00]
-; BTVER2-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmovsd %xmm0, (%rsi) # sched: [2:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movsd_mem:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [8:0.50]
-; ZNVER1-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movsd_mem:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [8:0.50]
-; ZNVER1-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load double, double* %a0, align 1
- %2 = fadd double %1, %1
- store double %2, double *%a1, align 1
- ret void
-}
-
-define <2 x double> @test_movsd_reg(<2 x double> %a0, <2 x double> %a1) {
-; GENERIC-LABEL: test_movsd_reg:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
-; GENERIC-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movsd_reg:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
-; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movsd_reg:
-; SLM: # %bb.0:
-; SLM-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
-; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movsd_reg:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
-; SANDY-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movsd_reg:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movsd_reg:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
-; HASWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movsd_reg:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movsd_reg:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
-; BROADWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movsd_reg:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movsd_reg:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movsd_reg:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movsd_reg:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
-; SKX-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movsd_reg:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movsd_reg:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [2:0.50]
-; BDVER2-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movsd_reg:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movsd_reg:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:0.50]
-; BTVER2-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movsd_reg:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movsd_reg:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:0.50]
-; ZNVER1-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movsd_reg:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 2, i32 0>
- ret <2 x double> %1
-}
-
-define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) {
-; GENERIC-LABEL: test_movupd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50]
-; GENERIC-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movupd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movupd (%rdi), %xmm0 # sched: [3:1.50]
-; ATOM-NEXT: addpd %xmm0, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT: movupd %xmm0, (%rsi) # sched: [2:1.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movupd:
-; SLM: # %bb.0:
-; SLM-NEXT: movupd (%rdi), %xmm0 # sched: [3:1.00]
-; SLM-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movupd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50]
-; SANDY-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movupd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50]
-; SANDY-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movupd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50]
-; HASWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movupd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50]
-; HASWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movupd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [5:0.50]
-; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movupd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmovupd (%rdi), %xmm0 # sched: [5:0.50]
-; BROADWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movupd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50]
-; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movupd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50]
-; SKYLAKE-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movupd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50]
-; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movupd:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50]
-; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movupd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [5:0.50]
-; BDVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movupd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovupd (%rdi), %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movupd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [5:1.00]
-; BTVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movupd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovupd (%rdi), %xmm0 # sched: [5:1.00]
-; BTVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movupd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movupd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovupd (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load <2 x double>, <2 x double> *%a0, align 1
- %2 = fadd <2 x double> %1, %1
- store <2 x double> %2, <2 x double> *%a1, align 1
- ret void
-}
-
-define <2 x double> @test_mulpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_mulpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: mulpd %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: mulpd (%rdi), %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_mulpd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: mulpd %xmm1, %xmm0 # sched: [9:4.50]
-; ATOM-NEXT: mulpd (%rdi), %xmm0 # sched: [10:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_mulpd:
-; SLM: # %bb.0:
-; SLM-NEXT: mulpd %xmm1, %xmm0 # sched: [5:2.00]
-; SLM-NEXT: mulpd (%rdi), %xmm0 # sched: [8:2.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_mulpd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [11:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_mulpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_mulpd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [5:0.50]
-; HASWELL-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [11:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_mulpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [11:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_mulpd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [3:0.50]
-; BROADWELL-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [8:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_mulpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; BROADWELL-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_mulpd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_mulpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_mulpd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_mulpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_mulpd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [10:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_mulpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_mulpd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [4:2.00]
-; BTVER2-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [9:2.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_mulpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:2.00]
-; BTVER2-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_mulpd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [3:0.50]
-; ZNVER1-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [10:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_mulpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; ZNVER1-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fmul <2 x double> %a0, %a1
- %2 = load <2 x double>, <2 x double> *%a2, align 16
- %3 = fmul <2 x double> %1, %2
- ret <2 x double> %3
-}
-
-define double @test_mulsd(double %a0, double %a1, double *%a2) {
-; GENERIC-LABEL: test_mulsd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: mulsd %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: mulsd (%rdi), %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_mulsd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: mulsd %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: mulsd (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_mulsd:
-; SLM: # %bb.0:
-; SLM-NEXT: mulsd %xmm1, %xmm0 # sched: [5:2.00]
-; SLM-NEXT: mulsd (%rdi), %xmm0 # sched: [8:2.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_mulsd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [11:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_mulsd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_mulsd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [5:0.50]
-; HASWELL-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [10:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_mulsd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; HASWELL-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_mulsd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [3:0.50]
-; BROADWELL-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [8:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_mulsd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; BROADWELL-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_mulsd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [9:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_mulsd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_mulsd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [9:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_mulsd:
-; SKX: # %bb.0:
-; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_mulsd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [10:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_mulsd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_mulsd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [4:2.00]
-; BTVER2-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [9:2.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_mulsd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:2.00]
-; BTVER2-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_mulsd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [3:0.50]
-; ZNVER1-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [10:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_mulsd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; ZNVER1-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fmul double %a0, %a1
- %2 = load double, double *%a2, align 8
- %3 = fmul double %1, %2
- ret double %3
-}
-
-define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_orpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: orpd (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_orpd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: orpd (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_orpd:
-; SLM: # %bb.0:
-; SLM-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: orpd (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_orpd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:1.00]
-; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_orpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_orpd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:1.00]
-; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_orpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_orpd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [6:1.00]
-; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_orpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_orpd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_orpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_orpd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_orpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_orpd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_orpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_orpd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_orpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_orpd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_orpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = bitcast <2 x double> %a0 to <4 x i32>
- %2 = bitcast <2 x double> %a1 to <4 x i32>
- %3 = or <4 x i32> %1, %2
- %4 = load <2 x double>, <2 x double> *%a2, align 16
- %5 = bitcast <2 x double> %4 to <4 x i32>
- %6 = or <4 x i32> %3, %5
- %7 = bitcast <4 x i32> %6 to <2 x double>
- %8 = fadd <2 x double> %a1, %7
- ret <2 x double> %8
-}
-
-define <8 x i16> @test_packssdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_packssdw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: packssdw %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: packssdw (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_packssdw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: packssdw (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_packssdw:
-; SLM: # %bb.0:
-; SLM-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00]
-; SLM-NEXT: packssdw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_packssdw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_packssdw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_packssdw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [7:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_packssdw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_packssdw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [6:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_packssdw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_packssdw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [7:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_packssdw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_packssdw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00]
-; SKX-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [7:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_packssdw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SKX-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_packssdw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_packssdw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_packssdw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_packssdw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_packssdw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_packssdw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1)
- %2 = bitcast <8 x i16> %1 to <4 x i32>
- %3 = load <4 x i32>, <4 x i32> *%a2, align 16
- %4 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %2, <4 x i32> %3)
- ret <8 x i16> %4
-}
-declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
-
-define <16 x i8> @test_packsswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_packsswb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: packsswb %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: packsswb (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_packsswb:
-; ATOM: # %bb.0:
-; ATOM-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: packsswb (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_packsswb:
-; SLM: # %bb.0:
-; SLM-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00]
-; SLM-NEXT: packsswb (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_packsswb:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_packsswb:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_packsswb:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [7:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_packsswb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_packsswb:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [6:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_packsswb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_packsswb:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [7:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_packsswb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_packsswb:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00]
-; SKX-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [7:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_packsswb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SKX-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_packsswb:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_packsswb:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_packsswb:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_packsswb:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_packsswb:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_packsswb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1)
- %2 = bitcast <16 x i8> %1 to <8 x i16>
- %3 = load <8 x i16>, <8 x i16> *%a2, align 16
- %4 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %2, <8 x i16> %3)
- ret <16 x i8> %4
-}
-declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
-
-define <16 x i8> @test_packuswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_packuswb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: packuswb %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: packuswb (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_packuswb:
-; ATOM: # %bb.0:
-; ATOM-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: packuswb (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_packuswb:
-; SLM: # %bb.0:
-; SLM-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00]
-; SLM-NEXT: packuswb (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_packuswb:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_packuswb:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_packuswb:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [7:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_packuswb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_packuswb:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [6:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_packuswb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_packuswb:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [7:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_packuswb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_packuswb:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00]
-; SKX-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [7:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_packuswb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SKX-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_packuswb:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_packuswb:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_packuswb:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_packuswb:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_packuswb:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_packuswb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1)
- %2 = bitcast <16 x i8> %1 to <8 x i16>
- %3 = load <8 x i16>, <8 x i16> *%a2, align 16
- %4 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %2, <8 x i16> %3)
- ret <16 x i8> %4
-}
-declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
-
-define <16 x i8> @test_paddb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
-; GENERIC-LABEL: test_paddb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_paddb:
-; ATOM: # %bb.0:
-; ATOM-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: paddb (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_paddb:
-; SLM: # %bb.0:
-; SLM-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: paddb (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_paddb:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_paddb:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_paddb:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_paddb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_paddb:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_paddb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_paddb:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_paddb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_paddb:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_paddb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_paddb:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_paddb:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_paddb:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_paddb:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_paddb:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_paddb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = add <16 x i8> %a0, %a1
- %2 = load <16 x i8>, <16 x i8> *%a2, align 16
- %3 = add <16 x i8> %1, %2
- ret <16 x i8> %3
-}
-
-define <4 x i32> @test_paddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_paddd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_paddd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: paddd (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_paddd:
-; SLM: # %bb.0:
-; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: paddd (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_paddd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_paddd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_paddd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_paddd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_paddd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_paddd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_paddd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_paddd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_paddd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_paddd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_paddd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_paddd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_paddd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_paddd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_paddd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_paddd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = add <4 x i32> %a0, %a1
- %2 = load <4 x i32>, <4 x i32> *%a2, align 16
- %3 = add <4 x i32> %1, %2
- ret <4 x i32> %3
-}
-
-define <2 x i64> @test_paddq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
-; GENERIC-LABEL: test_paddq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_paddq:
-; ATOM: # %bb.0:
-; ATOM-NEXT: paddq %xmm1, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT: paddq (%rdi), %xmm0 # sched: [3:1.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_paddq:
-; SLM: # %bb.0:
-; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: paddq (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_paddq:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_paddq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_paddq:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_paddq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_paddq:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_paddq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_paddq:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_paddq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_paddq:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_paddq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_paddq:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_paddq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_paddq:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_paddq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_paddq:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_paddq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = add <2 x i64> %a0, %a1
- %2 = load <2 x i64>, <2 x i64> *%a2, align 16
- %3 = add <2 x i64> %1, %2
- ret <2 x i64> %3
-}
-
-define <16 x i8> @test_paddsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
-; GENERIC-LABEL: test_paddsb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_paddsb:
-; ATOM: # %bb.0:
-; ATOM-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: paddsb (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_paddsb:
-; SLM: # %bb.0:
-; SLM-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: paddsb (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_paddsb:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_paddsb:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_paddsb:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_paddsb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_paddsb:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_paddsb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_paddsb:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_paddsb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_paddsb:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_paddsb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_paddsb:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_paddsb:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_paddsb:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_paddsb:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_paddsb:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_paddsb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a0, <16 x i8> %a1)
- %2 = load <16 x i8>, <16 x i8> *%a2, align 16
- %3 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %1, <16 x i8> %2)
- ret <16 x i8> %3
-}
-declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-
-define <8 x i16> @test_paddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_paddsw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_paddsw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: paddsw (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_paddsw:
-; SLM: # %bb.0:
-; SLM-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: paddsw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_paddsw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_paddsw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_paddsw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_paddsw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_paddsw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_paddsw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_paddsw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_paddsw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_paddsw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_paddsw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_paddsw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_paddsw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_paddsw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_paddsw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_paddsw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_paddsw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a0, <8 x i16> %a1)
- %2 = load <8 x i16>, <8 x i16> *%a2, align 16
- %3 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %1, <8 x i16> %2)
- ret <8 x i16> %3
-}
-declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-
-define <16 x i8> @test_paddusb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
-; GENERIC-LABEL: test_paddusb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_paddusb:
-; ATOM: # %bb.0:
-; ATOM-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: paddusb (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_paddusb:
-; SLM: # %bb.0:
-; SLM-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: paddusb (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_paddusb:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_paddusb:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_paddusb:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_paddusb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_paddusb:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_paddusb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_paddusb:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_paddusb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_paddusb:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_paddusb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_paddusb:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_paddusb:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_paddusb:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_paddusb:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_paddusb:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_paddusb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a0, <16 x i8> %a1)
- %2 = load <16 x i8>, <16 x i8> *%a2, align 16
- %3 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %1, <16 x i8> %2)
- ret <16 x i8> %3
-}
-declare <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-
-define <8 x i16> @test_paddusw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_paddusw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_paddusw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: paddusw (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_paddusw:
-; SLM: # %bb.0:
-; SLM-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: paddusw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_paddusw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_paddusw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_paddusw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_paddusw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_paddusw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_paddusw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_paddusw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_paddusw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_paddusw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_paddusw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_paddusw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_paddusw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_paddusw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_paddusw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_paddusw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_paddusw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a0, <8 x i16> %a1)
- %2 = load <8 x i16>, <8 x i16> *%a2, align 16
- %3 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %1, <8 x i16> %2)
- ret <8 x i16> %3
-}
-declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-
-define <8 x i16> @test_paddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_paddw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_paddw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: paddw (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_paddw:
-; SLM: # %bb.0:
-; SLM-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: paddw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_paddw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_paddw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_paddw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_paddw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_paddw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_paddw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_paddw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_paddw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_paddw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_paddw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_paddw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_paddw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_paddw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_paddw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_paddw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_paddw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = add <8 x i16> %a0, %a1
- %2 = load <8 x i16>, <8 x i16> *%a2, align 16
- %3 = add <8 x i16> %1, %2
- ret <8 x i16> %3
-}
-
-define <2 x i64> @test_pand(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
-; GENERIC-LABEL: test_pand:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pand:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pand %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: pand (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: paddq %xmm1, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pand:
-; SLM: # %bb.0:
-; SLM-NEXT: pand %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: pand (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pand:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pand:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pand:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pand:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pand:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT: pand (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pand:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; BROADWELL-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pand:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pand:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pand:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pand:
-; SKX: # %bb.0:
-; SKX-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pand:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pand %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pand:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pand:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pand (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pand:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pand:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pand (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pand:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = and <2 x i64> %a0, %a1
- %2 = load <2 x i64>, <2 x i64> *%a2, align 16
- %3 = and <2 x i64> %1, %2
- %4 = add <2 x i64> %3, %a1
- ret <2 x i64> %4
-}
-
-define <2 x i64> @test_pandn(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
-; GENERIC-LABEL: test_pandn:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33]
-; GENERIC-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50]
-; GENERIC-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
-; GENERIC-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pandn:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT: pandn (%rdi), %xmm1 # sched: [1:1.00]
-; ATOM-NEXT: paddq %xmm0, %xmm1 # sched: [2:1.00]
-; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pandn:
-; SLM: # %bb.0:
-; SLM-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.50]
-; SLM-NEXT: pandn (%rdi), %xmm1 # sched: [4:1.00]
-; SLM-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
-; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pandn:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33]
-; SANDY-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50]
-; SANDY-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pandn:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [7:0.50]
-; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pandn:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33]
-; HASWELL-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pandn:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [7:0.50]
-; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pandn:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pandn:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; BROADWELL-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [6:0.50]
-; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pandn:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pandn:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [7:0.50]
-; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pandn:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33]
-; SKX-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50]
-; SKX-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.33]
-; SKX-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pandn:
-; SKX: # %bb.0:
-; SKX-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [7:0.50]
-; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pandn:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pandn:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [7:0.50]
-; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pandn:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pandn:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [6:1.00]
-; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pandn:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pandn:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [8:0.50]
-; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = xor <2 x i64> %a0, <i64 -1, i64 -1>
- %2 = and <2 x i64> %a1, %1
- %3 = load <2 x i64>, <2 x i64> *%a2, align 16
- %4 = xor <2 x i64> %2, <i64 -1, i64 -1>
- %5 = and <2 x i64> %3, %4
- %6 = add <2 x i64> %2, %5
- ret <2 x i64> %6
-}
-
-define <16 x i8> @test_pavgb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
-; GENERIC-LABEL: test_pavgb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pavgb:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: pavgb (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pavgb:
-; SLM: # %bb.0:
-; SLM-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: pavgb (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pavgb:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pavgb:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pavgb:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pavgb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pavgb:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pavgb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pavgb:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pavgb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pavgb:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pavgb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pavgb:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pavgb:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pavgb:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pavgb:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pavgb:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pavgb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = zext <16 x i8> %a0 to <16 x i16>
- %2 = zext <16 x i8> %a1 to <16 x i16>
- %3 = add <16 x i16> %1, %2
- %4 = add <16 x i16> %3, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
- %5 = lshr <16 x i16> %4, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
- %6 = trunc <16 x i16> %5 to <16 x i8>
- %7 = load <16 x i8>, <16 x i8> *%a2, align 16
- %8 = zext <16 x i8> %6 to <16 x i16>
- %9 = zext <16 x i8> %7 to <16 x i16>
- %10 = add <16 x i16> %8, %9
- %11 = add <16 x i16> %10, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
- %12 = lshr <16 x i16> %11, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
- %13 = trunc <16 x i16> %12 to <16 x i8>
- ret <16 x i8> %13
-}
-
-define <8 x i16> @test_pavgw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_pavgw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pavgw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: pavgw (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pavgw:
-; SLM: # %bb.0:
-; SLM-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: pavgw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pavgw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pavgw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pavgw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pavgw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pavgw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pavgw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pavgw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pavgw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pavgw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pavgw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pavgw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pavgw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pavgw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pavgw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pavgw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pavgw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = zext <8 x i16> %a0 to <8 x i32>
- %2 = zext <8 x i16> %a1 to <8 x i32>
- %3 = add <8 x i32> %1, %2
- %4 = add <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
- %5 = lshr <8 x i32> %4, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
- %6 = trunc <8 x i32> %5 to <8 x i16>
- %7 = load <8 x i16>, <8 x i16> *%a2, align 16
- %8 = zext <8 x i16> %6 to <8 x i32>
- %9 = zext <8 x i16> %7 to <8 x i32>
- %10 = add <8 x i32> %8, %9
- %11 = add <8 x i32> %10, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
- %12 = lshr <8 x i32> %11, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
- %13 = trunc <8 x i32> %12 to <8 x i16>
- ret <8 x i16> %13
-}
-
-define <16 x i8> @test_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
-; GENERIC-LABEL: test_pcmpeqb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
-; GENERIC-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pcmpeqb:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pcmpeqb:
-; SLM: # %bb.0:
-; SLM-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
-; SLM-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pcmpeqb:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pcmpeqb:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pcmpeqb:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pcmpeqb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; HASWELL-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pcmpeqb:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pcmpeqb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; BROADWELL-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pcmpeqb:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pcmpeqb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pcmpeqb:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
-; SKX-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pcmpeqb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SKX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pcmpeqb:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pcmpeqb:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pcmpeqb:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pcmpeqb:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pcmpeqb:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pcmpeqb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = icmp eq <16 x i8> %a0, %a1
- %2 = load <16 x i8>, <16 x i8> *%a2, align 16
- %3 = icmp eq <16 x i8> %a0, %2
- %4 = or <16 x i1> %1, %3
- %5 = sext <16 x i1> %4 to <16 x i8>
- ret <16 x i8> %5
-}
-
-define <4 x i32> @test_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_pcmpeqd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
-; GENERIC-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pcmpeqd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pcmpeqd:
-; SLM: # %bb.0:
-; SLM-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
-; SLM-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pcmpeqd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pcmpeqd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pcmpeqd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pcmpeqd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; HASWELL-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pcmpeqd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pcmpeqd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; BROADWELL-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pcmpeqd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pcmpeqd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pcmpeqd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
-; SKX-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pcmpeqd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SKX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pcmpeqd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pcmpeqd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pcmpeqd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pcmpeqd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pcmpeqd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pcmpeqd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = icmp eq <4 x i32> %a0, %a1
- %2 = load <4 x i32>, <4 x i32> *%a2, align 16
- %3 = icmp eq <4 x i32> %a0, %2
- %4 = or <4 x i1> %1, %3
- %5 = sext <4 x i1> %4 to <4 x i32>
- ret <4 x i32> %5
-}
-
-define <8 x i16> @test_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_pcmpeqw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
-; GENERIC-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pcmpeqw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pcmpeqw:
-; SLM: # %bb.0:
-; SLM-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
-; SLM-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pcmpeqw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pcmpeqw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pcmpeqw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pcmpeqw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; HASWELL-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pcmpeqw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pcmpeqw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; BROADWELL-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pcmpeqw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pcmpeqw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pcmpeqw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
-; SKX-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pcmpeqw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SKX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pcmpeqw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pcmpeqw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pcmpeqw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pcmpeqw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pcmpeqw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pcmpeqw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = icmp eq <8 x i16> %a0, %a1
- %2 = load <8 x i16>, <8 x i16> *%a2, align 16
- %3 = icmp eq <8 x i16> %a0, %2
- %4 = or <8 x i1> %1, %3
- %5 = sext <8 x i1> %4 to <8 x i16>
- ret <8 x i16> %5
-}
-
-define <16 x i8> @test_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
-; GENERIC-LABEL: test_pcmpgtb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
-; GENERIC-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pcmpgtb:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50]
-; ATOM-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
-; ATOM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pcmpgtb:
-; SLM: # %bb.0:
-; SLM-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50]
-; SLM-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
-; SLM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pcmpgtb:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
-; SANDY-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
-; SANDY-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pcmpgtb:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pcmpgtb:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
-; HASWELL-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pcmpgtb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; HASWELL-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pcmpgtb:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pcmpgtb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; BROADWELL-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pcmpgtb:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pcmpgtb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pcmpgtb:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
-; SKX-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
-; SKX-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pcmpgtb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SKX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pcmpgtb:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pcmpgtb:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pcmpgtb:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pcmpgtb:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pcmpgtb:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pcmpgtb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = icmp sgt <16 x i8> %a0, %a1
- %2 = load <16 x i8>, <16 x i8> *%a2, align 16
- %3 = icmp sgt <16 x i8> %a0, %2
- %4 = or <16 x i1> %1, %3
- %5 = sext <16 x i1> %4 to <16 x i8>
- ret <16 x i8> %5
-}
-
-define <4 x i32> @test_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_pcmpgtd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
-; GENERIC-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pcmpgtd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50]
-; ATOM-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
-; ATOM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pcmpgtd:
-; SLM: # %bb.0:
-; SLM-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50]
-; SLM-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
-; SLM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pcmpgtd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
-; SANDY-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
-; SANDY-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pcmpgtd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pcmpgtd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
-; HASWELL-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pcmpgtd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; HASWELL-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pcmpgtd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pcmpgtd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; BROADWELL-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pcmpgtd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pcmpgtd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pcmpgtd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
-; SKX-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
-; SKX-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pcmpgtd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SKX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pcmpgtd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pcmpgtd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pcmpgtd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pcmpgtd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pcmpgtd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pcmpgtd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = icmp sgt <4 x i32> %a0, %a1
- %2 = load <4 x i32>, <4 x i32> *%a2, align 16
- %3 = icmp eq <4 x i32> %a0, %2
- %4 = or <4 x i1> %1, %3
- %5 = sext <4 x i1> %4 to <4 x i32>
- ret <4 x i32> %5
-}
-
-define <8 x i16> @test_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_pcmpgtw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
-; GENERIC-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
-; GENERIC-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pcmpgtw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50]
-; ATOM-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
-; ATOM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pcmpgtw:
-; SLM: # %bb.0:
-; SLM-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50]
-; SLM-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
-; SLM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pcmpgtw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
-; SANDY-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
-; SANDY-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pcmpgtw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pcmpgtw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
-; HASWELL-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pcmpgtw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; HASWELL-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pcmpgtw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pcmpgtw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; BROADWELL-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pcmpgtw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pcmpgtw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pcmpgtw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
-; SKX-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
-; SKX-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pcmpgtw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; SKX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pcmpgtw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pcmpgtw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pcmpgtw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pcmpgtw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pcmpgtw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pcmpgtw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = icmp sgt <8 x i16> %a0, %a1
- %2 = load <8 x i16>, <8 x i16> *%a2, align 16
- %3 = icmp sgt <8 x i16> %a0, %2
- %4 = or <8 x i1> %1, %3
- %5 = sext <8 x i1> %4 to <8 x i16>
- ret <8 x i16> %5
-}
-
-define i16 @test_pextrw(<8 x i16> %a0) {
-; GENERIC-LABEL: test_pextrw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pextrw $6, %xmm0, %eax # sched: [3:1.00]
-; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pextrw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pextrw $6, %xmm0, %eax # sched: [4:2.00]
-; ATOM-NEXT: # kill: def $ax killed $ax killed $eax
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pextrw:
-; SLM: # %bb.0:
-; SLM-NEXT: pextrw $6, %xmm0, %eax # sched: [1:1.00]
-; SLM-NEXT: # kill: def $ax killed $ax killed $eax
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pextrw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [3:1.00]
-; SANDY-SSE-NEXT: # kill: def $ax killed $ax killed $eax
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pextrw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00]
-; SANDY-NEXT: # kill: def $ax killed $ax killed $eax
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pextrw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [2:1.00]
-; HASWELL-SSE-NEXT: # kill: def $ax killed $ax killed $eax
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pextrw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpextrw $6, %xmm0, %eax # sched: [2:1.00]
-; HASWELL-NEXT: # kill: def $ax killed $ax killed $eax
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pextrw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [2:1.00]
-; BROADWELL-SSE-NEXT: # kill: def $ax killed $ax killed $eax
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pextrw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpextrw $6, %xmm0, %eax # sched: [2:1.00]
-; BROADWELL-NEXT: # kill: def $ax killed $ax killed $eax
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pextrw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [3:1.00]
-; SKYLAKE-SSE-NEXT: # kill: def $ax killed $ax killed $eax
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pextrw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00]
-; SKYLAKE-NEXT: # kill: def $ax killed $ax killed $eax
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pextrw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [3:1.00]
-; SKX-SSE-NEXT: # kill: def $ax killed $ax killed $eax
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pextrw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00]
-; SKX-NEXT: # kill: def $ax killed $ax killed $eax
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pextrw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [13:1.00]
-; BDVER2-SSE-NEXT: # kill: def $ax killed $ax killed $eax
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pextrw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpextrw $6, %xmm0, %eax # sched: [13:1.00]
-; BDVER2-NEXT: # kill: def $ax killed $ax killed $eax
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pextrw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [3:1.00]
-; BTVER2-SSE-NEXT: # kill: def $ax killed $ax killed $eax
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pextrw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00]
-; BTVER2-NEXT: # kill: def $ax killed $ax killed $eax
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pextrw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [2:2.00]
-; ZNVER1-SSE-NEXT: # kill: def $ax killed $ax killed $eax
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pextrw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpextrw $6, %xmm0, %eax # sched: [2:2.00]
-; ZNVER1-NEXT: # kill: def $ax killed $ax killed $eax
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = extractelement <8 x i16> %a0, i32 6
- ret i16 %1
-}
-
-define <8 x i16> @test_pinsrw(<8 x i16> %a0, i16 %a1, i16 *%a2) {
-; GENERIC-LABEL: test_pinsrw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:1.00]
-; GENERIC-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pinsrw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pinsrw $1, %edi, %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pinsrw:
-; SLM: # %bb.0:
-; SLM-NEXT: pinsrw $1, %edi, %xmm0 # sched: [1:1.00]
-; SLM-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pinsrw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:1.00]
-; SANDY-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pinsrw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:1.00]
-; SANDY-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pinsrw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:2.00]
-; HASWELL-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pinsrw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
-; HASWELL-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pinsrw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:2.00]
-; BROADWELL-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pinsrw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
-; BROADWELL-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pinsrw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:2.00]
-; SKYLAKE-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pinsrw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
-; SKYLAKE-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pinsrw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:2.00]
-; SKX-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pinsrw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
-; SKX-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pinsrw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [12:0.50]
-; BDVER2-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pinsrw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [12:0.50]
-; BDVER2-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pinsrw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [7:0.50]
-; BTVER2-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [4:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pinsrw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [7:0.50]
-; BTVER2-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pinsrw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pinsrw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = insertelement <8 x i16> %a0, i16 %a1, i32 1
- %2 = load i16, i16 *%a2
- %3 = insertelement <8 x i16> %1, i16 %2, i32 3
- ret <8 x i16> %3
-}
-
-define <4 x i32> @test_pmaddwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_pmaddwd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pmaddwd %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: pmaddwd (%rdi), %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pmaddwd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pmaddwd %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: pmaddwd (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pmaddwd:
-; SLM: # %bb.0:
-; SLM-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:1.00]
-; SLM-NEXT: pmaddwd (%rdi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pmaddwd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [11:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pmaddwd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pmaddwd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [5:1.00]
-; HASWELL-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [11:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pmaddwd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pmaddwd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [5:1.00]
-; BROADWELL-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [10:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmaddwd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BROADWELL-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pmaddwd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmaddwd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pmaddwd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmaddwd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pmaddwd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:1.00]
-; BDVER2-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [9:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pmaddwd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; BDVER2-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pmaddwd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pmaddwd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pmaddwd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:1.00]
-; ZNVER1-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [11:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pmaddwd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; ZNVER1-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1)
- %2 = bitcast <4 x i32> %1 to <8 x i16>
- %3 = load <8 x i16>, <8 x i16> *%a2, align 16
- %4 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %2, <8 x i16> %3)
- ret <4 x i32> %4
-}
-declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
-
-define <8 x i16> @test_pmaxsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_pmaxsw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pmaxsw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: pmaxsw (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pmaxsw:
-; SLM: # %bb.0:
-; SLM-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: pmaxsw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pmaxsw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pmaxsw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pmaxsw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pmaxsw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pmaxsw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmaxsw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pmaxsw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmaxsw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pmaxsw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmaxsw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pmaxsw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pmaxsw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pmaxsw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pmaxsw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pmaxsw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pmaxsw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1)
- %2 = load <8 x i16>, <8 x i16> *%a2, align 16
- %3 = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %1, <8 x i16> %2)
- ret <8 x i16> %3
-}
-declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
-
-define <16 x i8> @test_pmaxub(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
-; GENERIC-LABEL: test_pmaxub:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pmaxub:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: pmaxub (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pmaxub:
-; SLM: # %bb.0:
-; SLM-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: pmaxub (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pmaxub:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pmaxub:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pmaxub:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pmaxub:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pmaxub:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmaxub:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pmaxub:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmaxub:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pmaxub:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmaxub:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pmaxub:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pmaxub:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pmaxub:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pmaxub:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pmaxub:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pmaxub:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1)
- %2 = load <16 x i8>, <16 x i8> *%a2, align 16
- %3 = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %1, <16 x i8> %2)
- ret <16 x i8> %3
-}
-declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
-
-define <8 x i16> @test_pminsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_pminsw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pminsw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: pminsw (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pminsw:
-; SLM: # %bb.0:
-; SLM-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: pminsw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pminsw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pminsw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pminsw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pminsw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pminsw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pminsw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pminsw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pminsw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pminsw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pminsw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pminsw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pminsw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pminsw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pminsw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pminsw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pminsw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1)
- %2 = load <8 x i16>, <8 x i16> *%a2, align 16
- %3 = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %1, <8 x i16> %2)
- ret <8 x i16> %3
-}
-declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
-
-define <16 x i8> @test_pminub(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
-; GENERIC-LABEL: test_pminub:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pminub:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: pminub (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pminub:
-; SLM: # %bb.0:
-; SLM-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: pminub (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pminub:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pminub:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pminub:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pminub:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pminub:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pminub:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pminub:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pminub:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pminub:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pminub:
-; SKX: # %bb.0:
-; SKX-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pminub:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pminub:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pminub:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pminub:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pminub:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pminub:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1)
- %2 = load <16 x i8>, <16 x i8> *%a2, align 16
- %3 = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %1, <16 x i8> %2)
- ret <16 x i8> %3
-}
-declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
-
-define i32 @test_pmovmskb(<16 x i8> %a0) {
-; GENERIC-LABEL: test_pmovmskb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pmovmskb %xmm0, %eax # sched: [2:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pmovmskb:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pmovmskb %xmm0, %eax # sched: [3:3.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pmovmskb:
-; SLM: # %bb.0:
-; SLM-NEXT: pmovmskb %xmm0, %eax # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pmovmskb:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [2:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pmovmskb:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pmovmskb:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pmovmskb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmovmskb %xmm0, %eax # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pmovmskb:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmovmskb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmovmskb %xmm0, %eax # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pmovmskb:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmovmskb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pmovmskb:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [2:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmovmskb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pmovmskb:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [13:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pmovmskb:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpmovmskb %xmm0, %eax # sched: [13:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pmovmskb:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pmovmskb:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpmovmskb %xmm0, %eax # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pmovmskb:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [1:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pmovmskb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmovmskb %xmm0, %eax # sched: [1:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0)
- ret i32 %1
-}
-declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
-
-define <8 x i16> @test_pmulhuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_pmulhuw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pmulhuw %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: pmulhuw (%rdi), %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pmulhuw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pmulhuw %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: pmulhuw (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pmulhuw:
-; SLM: # %bb.0:
-; SLM-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:1.00]
-; SLM-NEXT: pmulhuw (%rdi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pmulhuw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [11:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pmulhuw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pmulhuw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [5:1.00]
-; HASWELL-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [11:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pmulhuw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pmulhuw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [5:1.00]
-; BROADWELL-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [10:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmulhuw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BROADWELL-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pmulhuw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmulhuw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pmulhuw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmulhuw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pmulhuw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:1.00]
-; BDVER2-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [9:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pmulhuw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; BDVER2-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pmulhuw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pmulhuw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pmulhuw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:1.00]
-; ZNVER1-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [11:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pmulhuw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; ZNVER1-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1)
- %2 = load <8 x i16>, <8 x i16> *%a2, align 16
- %3 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %1, <8 x i16> %2)
- ret <8 x i16> %3
-}
-declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
-
-define <8 x i16> @test_pmulhw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_pmulhw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pmulhw %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: pmulhw (%rdi), %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pmulhw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pmulhw %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: pmulhw (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pmulhw:
-; SLM: # %bb.0:
-; SLM-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:1.00]
-; SLM-NEXT: pmulhw (%rdi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pmulhw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [11:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pmulhw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pmulhw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [5:1.00]
-; HASWELL-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [11:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pmulhw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pmulhw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [5:1.00]
-; BROADWELL-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [10:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmulhw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BROADWELL-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pmulhw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmulhw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pmulhw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmulhw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pmulhw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:1.00]
-; BDVER2-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [9:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pmulhw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; BDVER2-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pmulhw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pmulhw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pmulhw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:1.00]
-; ZNVER1-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [11:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pmulhw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; ZNVER1-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1)
- %2 = load <8 x i16>, <8 x i16> *%a2, align 16
- %3 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %1, <8 x i16> %2)
- ret <8 x i16> %3
-}
-declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
-
-define <8 x i16> @test_pmullw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_pmullw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pmullw %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: pmullw (%rdi), %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pmullw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pmullw %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: pmullw (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pmullw:
-; SLM: # %bb.0:
-; SLM-NEXT: pmullw %xmm1, %xmm0 # sched: [4:1.00]
-; SLM-NEXT: pmullw (%rdi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pmullw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [11:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pmullw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pmullw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [5:1.00]
-; HASWELL-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [11:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pmullw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pmullw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [5:1.00]
-; BROADWELL-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [10:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmullw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BROADWELL-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pmullw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmullw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pmullw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmullw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pmullw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [4:1.00]
-; BDVER2-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [9:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pmullw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; BDVER2-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pmullw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pmullw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pmullw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [4:1.00]
-; ZNVER1-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [11:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pmullw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; ZNVER1-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = mul <8 x i16> %a0, %a1
- %2 = load <8 x i16>, <8 x i16> *%a2, align 16
- %3 = mul <8 x i16> %1, %2
- ret <8 x i16> %3
-}
-
-define <2 x i64> @test_pmuludq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_pmuludq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pmuludq %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: pmuludq (%rdi), %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pmuludq:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pmuludq %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: pmuludq (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pmuludq:
-; SLM: # %bb.0:
-; SLM-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:1.00]
-; SLM-NEXT: pmuludq (%rdi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pmuludq:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [11:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pmuludq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pmuludq:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [5:1.00]
-; HASWELL-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [11:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pmuludq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pmuludq:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [5:1.00]
-; BROADWELL-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [10:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmuludq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BROADWELL-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pmuludq:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmuludq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pmuludq:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmuludq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pmuludq:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:1.00]
-; BDVER2-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [9:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pmuludq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; BDVER2-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pmuludq:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pmuludq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pmuludq:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:1.00]
-; ZNVER1-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [11:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pmuludq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; ZNVER1-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1)
- %2 = bitcast <2 x i64> %1 to <4 x i32>
- %3 = load <4 x i32>, <4 x i32> *%a2, align 16
- %4 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %2, <4 x i32> %3)
- ret <2 x i64> %4
-}
-declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
-
-define <2 x i64> @test_por(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
-; GENERIC-LABEL: test_por:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: por (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_por:
-; ATOM: # %bb.0:
-; ATOM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: por (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: paddq %xmm1, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_por:
-; SLM: # %bb.0:
-; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: por (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_por:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-SSE-NEXT: por (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_por:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_por:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-SSE-NEXT: por (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_por:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_por:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT: por (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_por:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; BROADWELL-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_por:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: por (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_por:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_por:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: por (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_por:
-; SKX: # %bb.0:
-; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_por:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: por (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_por:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_por:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: por (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_por:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_por:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: por (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_por:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = or <2 x i64> %a0, %a1
- %2 = load <2 x i64>, <2 x i64> *%a2, align 16
- %3 = or <2 x i64> %1, %2
- %4 = add <2 x i64> %3, %a1
- ret <2 x i64> %4
-}
-
-define <2 x i64> @test_psadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
-; GENERIC-LABEL: test_psadbw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psadbw %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: psadbw (%rdi), %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psadbw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psadbw %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: psadbw (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psadbw:
-; SLM: # %bb.0:
-; SLM-NEXT: psadbw %xmm1, %xmm0 # sched: [4:1.00]
-; SLM-NEXT: psadbw (%rdi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_psadbw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [11:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_psadbw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_psadbw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [5:1.00]
-; HASWELL-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [11:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_psadbw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_psadbw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [5:1.00]
-; BROADWELL-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [10:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psadbw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BROADWELL-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_psadbw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [3:1.00]
-; SKYLAKE-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [9:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psadbw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SKYLAKE-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_psadbw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [3:1.00]
-; SKX-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [9:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psadbw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SKX-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_psadbw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [4:0.50]
-; BDVER2-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [9:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_psadbw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; BDVER2-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_psadbw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_psadbw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_psadbw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_psadbw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1)
- %2 = bitcast <2 x i64> %1 to <16 x i8>
- %3 = load <16 x i8>, <16 x i8> *%a2, align 16
- %4 = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %2, <16 x i8> %3)
- ret <2 x i64> %4
-}
-declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
-
-define <4 x i32> @test_pshufd(<4 x i32> %a0, <4 x i32> *%a1) {
-; GENERIC-LABEL: test_pshufd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.50]
-; GENERIC-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:0.50]
-; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pshufd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00]
-; ATOM-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [1:1.00]
-; ATOM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pshufd:
-; SLM: # %bb.0:
-; SLM-NEXT: pshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [4:1.00]
-; SLM-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00]
-; SLM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
-; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pshufd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.50]
-; SANDY-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:0.50]
-; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pshufd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:0.50]
-; SANDY-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:0.50]
-; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pshufd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00]
-; HASWELL-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:1.00]
-; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pshufd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00]
-; HASWELL-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00]
-; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pshufd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00]
-; BROADWELL-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [6:1.00]
-; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pshufd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00]
-; BROADWELL-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [6:1.00]
-; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pshufd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:1.00]
-; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pshufd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00]
-; SKYLAKE-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00]
-; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pshufd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00]
-; SKX-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:1.00]
-; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pshufd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00]
-; SKX-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00]
-; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pshufd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [2:0.50]
-; BDVER2-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:0.50]
-; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pshufd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:0.50]
-; BDVER2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [2:0.50]
-; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pshufd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.50]
-; BTVER2-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [6:1.00]
-; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pshufd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [6:1.00]
-; BTVER2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:0.50]
-; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pshufd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [8:0.50]
-; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pshufd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [8:0.50]
-; ZNVER1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:0.25]
-; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
- %2 = load <4 x i32>, <4 x i32> *%a1, align 16
- %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
- %4 = add <4 x i32> %1, %3
- ret <4 x i32> %4
-}
-
-define <8 x i16> @test_pshufhw(<8 x i16> %a0, <8 x i16> *%a1) {
-; GENERIC-LABEL: test_pshufhw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50]
-; GENERIC-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50]
-; GENERIC-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pshufhw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
-; ATOM-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [1:1.00]
-; ATOM-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pshufhw:
-; SLM: # %bb.0:
-; SLM-NEXT: pshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [4:1.00]
-; SLM-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
-; SLM-NEXT: paddw %xmm0, %xmm1 # sched: [1:0.50]
-; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pshufhw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50]
-; SANDY-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50]
-; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pshufhw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50]
-; SANDY-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50]
-; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pshufhw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
-; HASWELL-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00]
-; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pshufhw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
-; HASWELL-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00]
-; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pshufhw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
-; BROADWELL-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00]
-; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pshufhw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
-; BROADWELL-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00]
-; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pshufhw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00]
-; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pshufhw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
-; SKYLAKE-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00]
-; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pshufhw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
-; SKX-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00]
-; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pshufhw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
-; SKX-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00]
-; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pshufhw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [2:0.50]
-; BDVER2-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50]
-; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pshufhw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50]
-; BDVER2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [2:0.50]
-; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pshufhw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50]
-; BTVER2-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00]
-; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pshufhw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00]
-; BTVER2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50]
-; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pshufhw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [8:0.50]
-; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pshufhw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [8:0.50]
-; ZNVER1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.25]
-; ZNVER1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 7, i32 6>
- %2 = load <8 x i16>, <8 x i16> *%a1, align 16
- %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 5, i32 4>
- %4 = add <8 x i16> %1, %3
- ret <8 x i16> %4
-}
-
-define <8 x i16> @test_pshuflw(<8 x i16> %a0, <8 x i16> *%a1) {
-; GENERIC-LABEL: test_pshuflw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50]
-; GENERIC-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50]
-; GENERIC-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pshuflw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
-; ATOM-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [1:1.00]
-; ATOM-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pshuflw:
-; SLM: # %bb.0:
-; SLM-NEXT: pshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [4:1.00]
-; SLM-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
-; SLM-NEXT: paddw %xmm0, %xmm1 # sched: [1:0.50]
-; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pshuflw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50]
-; SANDY-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50]
-; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pshuflw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50]
-; SANDY-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50]
-; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pshuflw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
-; HASWELL-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00]
-; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pshuflw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
-; HASWELL-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00]
-; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pshuflw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
-; BROADWELL-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00]
-; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pshuflw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
-; BROADWELL-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00]
-; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pshuflw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00]
-; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pshuflw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
-; SKYLAKE-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00]
-; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pshuflw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
-; SKX-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00]
-; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pshuflw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
-; SKX-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00]
-; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pshuflw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [2:0.50]
-; BDVER2-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50]
-; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pshuflw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50]
-; BDVER2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [2:0.50]
-; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pshuflw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50]
-; BTVER2-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00]
-; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pshuflw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00]
-; BTVER2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50]
-; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pshuflw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [8:0.50]
-; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pshuflw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [8:0.50]
-; ZNVER1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.25]
-; ZNVER1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
- %2 = load <8 x i16>, <8 x i16> *%a1, align 16
- %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
- %4 = add <8 x i16> %1, %3
- ret <8 x i16> %4
-}
-
-define <4 x i32> @test_pslld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_pslld:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00]
-; GENERIC-NEXT: pslld (%rdi), %xmm0 # sched: [8:1.00]
-; GENERIC-NEXT: pslld $2, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pslld:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT: pslld (%rdi), %xmm0 # sched: [3:1.50]
-; ATOM-NEXT: pslld $2, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pslld:
-; SLM: # %bb.0:
-; SLM-NEXT: pslld %xmm1, %xmm0 # sched: [1:1.00]
-; SLM-NEXT: pslld (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: pslld $2, %xmm0 # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pslld:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00]
-; SANDY-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [8:1.00]
-; SANDY-SSE-NEXT: pslld $2, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pslld:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SANDY-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; SANDY-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pslld:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00]
-; HASWELL-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-SSE-NEXT: pslld $2, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pslld:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; HASWELL-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; HASWELL-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pslld:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00]
-; BROADWELL-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [7:1.00]
-; BROADWELL-SSE-NEXT: pslld $2, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pslld:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BROADWELL-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BROADWELL-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pslld:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: pslld $2, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pslld:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SKYLAKE-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pslld:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00]
-; SKX-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: pslld $2, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pslld:
-; SKX: # %bb.0:
-; SKX-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SKX-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pslld:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [3:0.50]
-; BDVER2-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [8:0.50]
-; BDVER2-SSE-NEXT: pslld $2, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pslld:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; BDVER2-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; BDVER2-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pslld:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: pslld $2, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pslld:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pslld:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [1:1.00]
-; ZNVER1-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [8:1.00]
-; ZNVER1-SSE-NEXT: pslld $2, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pslld:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; ZNVER1-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; ZNVER1-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1)
- %2 = load <4 x i32>, <4 x i32> *%a2, align 16
- %3 = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %1, <4 x i32> %2)
- %4 = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %3, i32 2)
- ret <4 x i32> %4
-}
-declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
-declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
-
-define <4 x i32> @test_pslldq(<4 x i32> %a0) {
-; GENERIC-LABEL: test_pslldq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pslldq:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pslldq:
-; SLM: # %bb.0:
-; SLM-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pslldq:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pslldq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pslldq:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pslldq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pslldq:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pslldq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pslldq:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pslldq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pslldq:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pslldq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pslldq:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pslldq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pslldq:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pslldq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pslldq:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pslldq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 4, i32 0, i32 1, i32 2>
- ret <4 x i32> %1
-}
-
-define <2 x i64> @test_psllq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
-; GENERIC-LABEL: test_psllq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00]
-; GENERIC-NEXT: psllq (%rdi), %xmm0 # sched: [8:1.00]
-; GENERIC-NEXT: psllq $2, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psllq:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT: psllq (%rdi), %xmm0 # sched: [3:1.50]
-; ATOM-NEXT: psllq $2, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psllq:
-; SLM: # %bb.0:
-; SLM-NEXT: psllq %xmm1, %xmm0 # sched: [1:1.00]
-; SLM-NEXT: psllq (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: psllq $2, %xmm0 # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_psllq:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00]
-; SANDY-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [8:1.00]
-; SANDY-SSE-NEXT: psllq $2, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_psllq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SANDY-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; SANDY-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_psllq:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00]
-; HASWELL-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-SSE-NEXT: psllq $2, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_psllq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; HASWELL-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; HASWELL-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_psllq:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00]
-; BROADWELL-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [7:1.00]
-; BROADWELL-SSE-NEXT: psllq $2, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psllq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BROADWELL-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BROADWELL-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_psllq:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: psllq $2, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psllq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SKYLAKE-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_psllq:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00]
-; SKX-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: psllq $2, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psllq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SKX-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_psllq:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [3:0.50]
-; BDVER2-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [8:0.50]
-; BDVER2-SSE-NEXT: psllq $2, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_psllq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; BDVER2-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; BDVER2-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_psllq:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: psllq $2, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_psllq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_psllq:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [1:1.00]
-; ZNVER1-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [8:1.00]
-; ZNVER1-SSE-NEXT: psllq $2, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_psllq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; ZNVER1-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; ZNVER1-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1)
- %2 = load <2 x i64>, <2 x i64> *%a2, align 16
- %3 = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %1, <2 x i64> %2)
- %4 = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %3, i32 2)
- ret <2 x i64> %4
-}
-declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
-declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
-
-define <8 x i16> @test_psllw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_psllw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00]
-; GENERIC-NEXT: psllw (%rdi), %xmm0 # sched: [8:1.00]
-; GENERIC-NEXT: psllw $2, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psllw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT: psllw (%rdi), %xmm0 # sched: [3:1.50]
-; ATOM-NEXT: psllw $2, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psllw:
-; SLM: # %bb.0:
-; SLM-NEXT: psllw %xmm1, %xmm0 # sched: [1:1.00]
-; SLM-NEXT: psllw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: psllw $2, %xmm0 # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_psllw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00]
-; SANDY-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [8:1.00]
-; SANDY-SSE-NEXT: psllw $2, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_psllw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SANDY-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; SANDY-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_psllw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00]
-; HASWELL-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-SSE-NEXT: psllw $2, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_psllw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; HASWELL-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; HASWELL-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_psllw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00]
-; BROADWELL-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [7:1.00]
-; BROADWELL-SSE-NEXT: psllw $2, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psllw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BROADWELL-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BROADWELL-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_psllw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: psllw $2, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psllw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SKYLAKE-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_psllw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00]
-; SKX-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: psllw $2, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psllw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SKX-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_psllw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [3:0.50]
-; BDVER2-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [8:0.50]
-; BDVER2-SSE-NEXT: psllw $2, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_psllw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; BDVER2-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; BDVER2-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_psllw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: psllw $2, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_psllw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_psllw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [1:1.00]
-; ZNVER1-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [8:1.00]
-; ZNVER1-SSE-NEXT: psllw $2, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_psllw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; ZNVER1-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; ZNVER1-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1)
- %2 = load <8 x i16>, <8 x i16> *%a2, align 16
- %3 = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %1, <8 x i16> %2)
- %4 = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %3, i32 2)
- ret <8 x i16> %4
-}
-declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
-declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
-
-define <4 x i32> @test_psrad(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_psrad:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00]
-; GENERIC-NEXT: psrad (%rdi), %xmm0 # sched: [8:1.00]
-; GENERIC-NEXT: psrad $2, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psrad:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT: psrad (%rdi), %xmm0 # sched: [3:1.50]
-; ATOM-NEXT: psrad $2, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psrad:
-; SLM: # %bb.0:
-; SLM-NEXT: psrad %xmm1, %xmm0 # sched: [1:1.00]
-; SLM-NEXT: psrad (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: psrad $2, %xmm0 # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_psrad:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00]
-; SANDY-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [8:1.00]
-; SANDY-SSE-NEXT: psrad $2, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_psrad:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SANDY-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; SANDY-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_psrad:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00]
-; HASWELL-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-SSE-NEXT: psrad $2, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_psrad:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; HASWELL-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; HASWELL-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_psrad:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00]
-; BROADWELL-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [7:1.00]
-; BROADWELL-SSE-NEXT: psrad $2, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psrad:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BROADWELL-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BROADWELL-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_psrad:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: psrad $2, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psrad:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SKYLAKE-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_psrad:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00]
-; SKX-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: psrad $2, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psrad:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SKX-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_psrad:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [3:0.50]
-; BDVER2-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [8:0.50]
-; BDVER2-SSE-NEXT: psrad $2, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_psrad:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; BDVER2-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; BDVER2-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_psrad:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: psrad $2, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_psrad:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_psrad:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [1:1.00]
-; ZNVER1-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [8:1.00]
-; ZNVER1-SSE-NEXT: psrad $2, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_psrad:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; ZNVER1-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; ZNVER1-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1)
- %2 = load <4 x i32>, <4 x i32> *%a2, align 16
- %3 = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %1, <4 x i32> %2)
- %4 = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %3, i32 2)
- ret <4 x i32> %4
-}
-declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
-declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
-
-define <8 x i16> @test_psraw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_psraw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00]
-; GENERIC-NEXT: psraw (%rdi), %xmm0 # sched: [8:1.00]
-; GENERIC-NEXT: psraw $2, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psraw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT: psraw (%rdi), %xmm0 # sched: [3:1.50]
-; ATOM-NEXT: psraw $2, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psraw:
-; SLM: # %bb.0:
-; SLM-NEXT: psraw %xmm1, %xmm0 # sched: [1:1.00]
-; SLM-NEXT: psraw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: psraw $2, %xmm0 # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_psraw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00]
-; SANDY-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [8:1.00]
-; SANDY-SSE-NEXT: psraw $2, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_psraw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SANDY-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; SANDY-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_psraw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00]
-; HASWELL-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-SSE-NEXT: psraw $2, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_psraw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; HASWELL-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; HASWELL-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_psraw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00]
-; BROADWELL-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [7:1.00]
-; BROADWELL-SSE-NEXT: psraw $2, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psraw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BROADWELL-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BROADWELL-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_psraw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: psraw $2, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psraw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SKYLAKE-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_psraw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00]
-; SKX-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: psraw $2, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psraw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SKX-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_psraw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [3:0.50]
-; BDVER2-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [8:0.50]
-; BDVER2-SSE-NEXT: psraw $2, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_psraw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; BDVER2-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; BDVER2-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_psraw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: psraw $2, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_psraw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_psraw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [1:1.00]
-; ZNVER1-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [8:1.00]
-; ZNVER1-SSE-NEXT: psraw $2, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_psraw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; ZNVER1-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; ZNVER1-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1)
- %2 = load <8 x i16>, <8 x i16> *%a2, align 16
- %3 = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %1, <8 x i16> %2)
- %4 = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %3, i32 2)
- ret <8 x i16> %4
-}
-declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
-declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
-
-define <4 x i32> @test_psrld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_psrld:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00]
-; GENERIC-NEXT: psrld (%rdi), %xmm0 # sched: [8:1.00]
-; GENERIC-NEXT: psrld $2, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psrld:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT: psrld (%rdi), %xmm0 # sched: [3:1.50]
-; ATOM-NEXT: psrld $2, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psrld:
-; SLM: # %bb.0:
-; SLM-NEXT: psrld %xmm1, %xmm0 # sched: [1:1.00]
-; SLM-NEXT: psrld (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: psrld $2, %xmm0 # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_psrld:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00]
-; SANDY-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [8:1.00]
-; SANDY-SSE-NEXT: psrld $2, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_psrld:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SANDY-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; SANDY-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_psrld:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00]
-; HASWELL-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-SSE-NEXT: psrld $2, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_psrld:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; HASWELL-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; HASWELL-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_psrld:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00]
-; BROADWELL-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [7:1.00]
-; BROADWELL-SSE-NEXT: psrld $2, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psrld:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BROADWELL-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BROADWELL-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_psrld:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: psrld $2, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psrld:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SKYLAKE-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_psrld:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00]
-; SKX-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: psrld $2, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psrld:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SKX-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_psrld:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [3:0.50]
-; BDVER2-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [8:0.50]
-; BDVER2-SSE-NEXT: psrld $2, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_psrld:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; BDVER2-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; BDVER2-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_psrld:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: psrld $2, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_psrld:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_psrld:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [1:1.00]
-; ZNVER1-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [8:1.00]
-; ZNVER1-SSE-NEXT: psrld $2, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_psrld:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; ZNVER1-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; ZNVER1-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1)
- %2 = load <4 x i32>, <4 x i32> *%a2, align 16
- %3 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %1, <4 x i32> %2)
- %4 = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %3, i32 2)
- ret <4 x i32> %4
-}
-declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
-declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
-
-define <4 x i32> @test_psrldq(<4 x i32> %a0) {
-; GENERIC-LABEL: test_psrldq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psrldq:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psrldq:
-; SLM: # %bb.0:
-; SLM-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_psrldq:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_psrldq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_psrldq:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_psrldq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_psrldq:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psrldq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_psrldq:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psrldq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_psrldq:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psrldq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_psrldq:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_psrldq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_psrldq:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_psrldq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_psrldq:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_psrldq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
- ret <4 x i32> %1
-}
-
-define <2 x i64> @test_psrlq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
-; GENERIC-LABEL: test_psrlq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00]
-; GENERIC-NEXT: psrlq (%rdi), %xmm0 # sched: [8:1.00]
-; GENERIC-NEXT: psrlq $2, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psrlq:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT: psrlq (%rdi), %xmm0 # sched: [3:1.50]
-; ATOM-NEXT: psrlq $2, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psrlq:
-; SLM: # %bb.0:
-; SLM-NEXT: psrlq %xmm1, %xmm0 # sched: [1:1.00]
-; SLM-NEXT: psrlq (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: psrlq $2, %xmm0 # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_psrlq:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00]
-; SANDY-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [8:1.00]
-; SANDY-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_psrlq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SANDY-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; SANDY-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_psrlq:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00]
-; HASWELL-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_psrlq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; HASWELL-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; HASWELL-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_psrlq:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00]
-; BROADWELL-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [7:1.00]
-; BROADWELL-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psrlq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BROADWELL-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BROADWELL-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_psrlq:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psrlq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SKYLAKE-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_psrlq:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00]
-; SKX-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psrlq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SKX-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_psrlq:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [3:0.50]
-; BDVER2-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [8:0.50]
-; BDVER2-SSE-NEXT: psrlq $2, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_psrlq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; BDVER2-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; BDVER2-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_psrlq:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: psrlq $2, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_psrlq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_psrlq:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [1:1.00]
-; ZNVER1-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [8:1.00]
-; ZNVER1-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_psrlq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; ZNVER1-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; ZNVER1-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1)
- %2 = load <2 x i64>, <2 x i64> *%a2, align 16
- %3 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %1, <2 x i64> %2)
- %4 = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %3, i32 2)
- ret <2 x i64> %4
-}
-declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
-declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
-
-define <8 x i16> @test_psrlw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_psrlw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00]
-; GENERIC-NEXT: psrlw (%rdi), %xmm0 # sched: [8:1.00]
-; GENERIC-NEXT: psrlw $2, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psrlw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT: psrlw (%rdi), %xmm0 # sched: [3:1.50]
-; ATOM-NEXT: psrlw $2, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psrlw:
-; SLM: # %bb.0:
-; SLM-NEXT: psrlw %xmm1, %xmm0 # sched: [1:1.00]
-; SLM-NEXT: psrlw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: psrlw $2, %xmm0 # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_psrlw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00]
-; SANDY-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [8:1.00]
-; SANDY-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_psrlw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SANDY-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; SANDY-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_psrlw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00]
-; HASWELL-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_psrlw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; HASWELL-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; HASWELL-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_psrlw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00]
-; BROADWELL-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [7:1.00]
-; BROADWELL-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psrlw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BROADWELL-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BROADWELL-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_psrlw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psrlw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SKYLAKE-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_psrlw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00]
-; SKX-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psrlw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SKX-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_psrlw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [3:0.50]
-; BDVER2-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [8:0.50]
-; BDVER2-SSE-NEXT: psrlw $2, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_psrlw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; BDVER2-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; BDVER2-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_psrlw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: psrlw $2, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_psrlw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_psrlw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [1:1.00]
-; ZNVER1-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [8:1.00]
-; ZNVER1-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_psrlw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; ZNVER1-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; ZNVER1-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1)
- %2 = load <8 x i16>, <8 x i16> *%a2, align 16
- %3 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %1, <8 x i16> %2)
- %4 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %3, i32 2)
- ret <8 x i16> %4
-}
-declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
-declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
-
-define <16 x i8> @test_psubb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
-; GENERIC-LABEL: test_psubb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psubb:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: psubb (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psubb:
-; SLM: # %bb.0:
-; SLM-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: psubb (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_psubb:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_psubb:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_psubb:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_psubb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_psubb:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psubb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_psubb:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psubb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_psubb:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psubb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_psubb:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_psubb:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_psubb:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_psubb:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_psubb:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_psubb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = sub <16 x i8> %a0, %a1
- %2 = load <16 x i8>, <16 x i8> *%a2, align 16
- %3 = sub <16 x i8> %1, %2
- ret <16 x i8> %3
-}
-
-define <4 x i32> @test_psubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_psubd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psubd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: psubd (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psubd:
-; SLM: # %bb.0:
-; SLM-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: psubd (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_psubd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_psubd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_psubd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_psubd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_psubd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psubd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_psubd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psubd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_psubd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psubd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_psubd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_psubd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_psubd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_psubd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_psubd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_psubd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = sub <4 x i32> %a0, %a1
- %2 = load <4 x i32>, <4 x i32> *%a2, align 16
- %3 = sub <4 x i32> %1, %2
- ret <4 x i32> %3
-}
-
-define <2 x i64> @test_psubq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
-; GENERIC-LABEL: test_psubq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psubq:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psubq %xmm1, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT: psubq (%rdi), %xmm0 # sched: [3:1.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psubq:
-; SLM: # %bb.0:
-; SLM-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: psubq (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_psubq:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_psubq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_psubq:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_psubq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_psubq:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psubq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_psubq:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psubq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_psubq:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psubq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_psubq:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_psubq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_psubq:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_psubq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_psubq:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_psubq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = sub <2 x i64> %a0, %a1
- %2 = load <2 x i64>, <2 x i64> *%a2, align 16
- %3 = sub <2 x i64> %1, %2
- ret <2 x i64> %3
-}
-
-define <16 x i8> @test_psubsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
-; GENERIC-LABEL: test_psubsb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psubsb:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: psubsb (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psubsb:
-; SLM: # %bb.0:
-; SLM-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: psubsb (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_psubsb:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_psubsb:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_psubsb:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_psubsb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_psubsb:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psubsb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_psubsb:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psubsb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_psubsb:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psubsb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_psubsb:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_psubsb:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_psubsb:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_psubsb:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_psubsb:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_psubsb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a0, <16 x i8> %a1)
- %2 = load <16 x i8>, <16 x i8> *%a2, align 16
- %3 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %1, <16 x i8> %2)
- ret <16 x i8> %3
-}
-declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-
-define <8 x i16> @test_psubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_psubsw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psubsw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: psubsw (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psubsw:
-; SLM: # %bb.0:
-; SLM-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: psubsw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_psubsw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_psubsw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_psubsw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_psubsw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_psubsw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psubsw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_psubsw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psubsw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_psubsw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psubsw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_psubsw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_psubsw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_psubsw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_psubsw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_psubsw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_psubsw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a0, <8 x i16> %a1)
- %2 = load <8 x i16>, <8 x i16> *%a2, align 16
- %3 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %1, <8 x i16> %2)
- ret <8 x i16> %3
-}
-declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-
-define <16 x i8> @test_psubusb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
-; GENERIC-LABEL: test_psubusb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psubusb:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: psubusb (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psubusb:
-; SLM: # %bb.0:
-; SLM-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: psubusb (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_psubusb:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_psubusb:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_psubusb:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_psubusb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_psubusb:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psubusb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_psubusb:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psubusb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_psubusb:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psubusb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_psubusb:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_psubusb:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_psubusb:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_psubusb:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_psubusb:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_psubusb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a0, <16 x i8> %a1)
- %2 = load <16 x i8>, <16 x i8> *%a2, align 16
- %3 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %1, <16 x i8> %2)
- ret <16 x i8> %3
-}
-declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-
-define <8 x i16> @test_psubusw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_psubusw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psubusw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: psubusw (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psubusw:
-; SLM: # %bb.0:
-; SLM-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: psubusw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_psubusw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_psubusw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_psubusw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_psubusw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_psubusw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psubusw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_psubusw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psubusw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_psubusw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psubusw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_psubusw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_psubusw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_psubusw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_psubusw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_psubusw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_psubusw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a0, <8 x i16> %a1)
- %2 = load <8 x i16>, <8 x i16> *%a2, align 16
- %3 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %1, <8 x i16> %2)
- ret <8 x i16> %3
-}
-declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-
-define <8 x i16> @test_psubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_psubw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psubw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: psubw (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psubw:
-; SLM: # %bb.0:
-; SLM-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: psubw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_psubw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_psubw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_psubw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_psubw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_psubw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psubw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_psubw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psubw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_psubw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psubw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_psubw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_psubw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_psubw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_psubw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_psubw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_psubw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = sub <8 x i16> %a0, %a1
- %2 = load <8 x i16>, <8 x i16> *%a2, align 16
- %3 = sub <8 x i16> %1, %2
- ret <8 x i16> %3
-}
-
-define <16 x i8> @test_punpckhbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
-; GENERIC-LABEL: test_punpckhbw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50]
-; GENERIC-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_punpckhbw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
-; ATOM-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_punpckhbw:
-; SLM: # %bb.0:
-; SLM-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
-; SLM-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_punpckhbw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50]
-; SANDY-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_punpckhbw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50]
-; SANDY-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_punpckhbw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
-; HASWELL-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_punpckhbw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
-; HASWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_punpckhbw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
-; BROADWELL-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_punpckhbw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
-; BROADWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_punpckhbw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_punpckhbw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
-; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_punpckhbw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
-; SKX-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_punpckhbw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
-; SKX-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_punpckhbw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [2:0.50]
-; BDVER2-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_punpckhbw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [2:0.50]
-; BDVER2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_punpckhbw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50]
-; BTVER2-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_punpckhbw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50]
-; BTVER2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_punpckhbw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.25]
-; ZNVER1-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_punpckhbw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.25]
-; ZNVER1-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
- %2 = load <16 x i8>, <16 x i8> *%a2, align 16
- %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
- ret <16 x i8> %3
-}
-
-define <4 x i32> @test_punpckhdq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_punpckhdq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
-; GENERIC-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50]
-; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_punpckhdq:
-; ATOM: # %bb.0:
-; ATOM-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; ATOM-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [1:1.00]
-; ATOM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_punpckhdq:
-; SLM: # %bb.0:
-; SLM-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; SLM-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [4:1.00]
-; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_punpckhdq:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
-; SANDY-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50]
-; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_punpckhdq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
-; SANDY-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50]
-; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_punpckhdq:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; HASWELL-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
-; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_punpckhdq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; HASWELL-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
-; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_punpckhdq:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; BROADWELL-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00]
-; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_punpckhdq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; BROADWELL-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00]
-; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_punpckhdq:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
-; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_punpckhdq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
-; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_punpckhdq:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; SKX-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
-; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_punpckhdq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; SKX-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
-; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_punpckhdq:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [2:0.50]
-; BDVER2-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50]
-; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_punpckhdq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [2:0.50]
-; BDVER2-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50]
-; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_punpckhdq:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
-; BTVER2-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00]
-; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_punpckhdq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
-; BTVER2-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00]
-; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_punpckhdq:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25]
-; ZNVER1-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [8:0.50]
-; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_punpckhdq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25]
-; ZNVER1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [8:0.50]
-; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
- %2 = load <4 x i32>, <4 x i32> *%a2, align 16
- %3 = shufflevector <4 x i32> %a1, <4 x i32> %2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
- %4 = add <4 x i32> %1, %3
- ret <4 x i32> %4
-}
-
-define <2 x i64> @test_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
-; GENERIC-LABEL: test_punpckhqdq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
-; GENERIC-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50]
-; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_punpckhqdq:
-; ATOM: # %bb.0:
-; ATOM-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; ATOM-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [1:1.00]
-; ATOM-NEXT: paddq %xmm1, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_punpckhqdq:
-; SLM: # %bb.0:
-; SLM-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; SLM-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [4:1.00]
-; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_punpckhqdq:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
-; SANDY-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50]
-; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_punpckhqdq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
-; SANDY-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50]
-; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_punpckhqdq:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; HASWELL-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
-; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_punpckhqdq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; HASWELL-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
-; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_punpckhqdq:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; BROADWELL-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00]
-; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_punpckhqdq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00]
-; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_punpckhqdq:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
-; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_punpckhqdq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
-; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_punpckhqdq:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; SKX-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
-; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_punpckhqdq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; SKX-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
-; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_punpckhqdq:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [2:0.50]
-; BDVER2-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50]
-; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_punpckhqdq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [2:0.50]
-; BDVER2-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50]
-; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_punpckhqdq:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
-; BTVER2-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00]
-; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_punpckhqdq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
-; BTVER2-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00]
-; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_punpckhqdq:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.25]
-; ZNVER1-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [8:0.50]
-; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_punpckhqdq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.25]
-; ZNVER1-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [8:0.50]
-; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 1, i32 3>
- %2 = load <2 x i64>, <2 x i64> *%a2, align 16
- %3 = shufflevector <2 x i64> %a1, <2 x i64> %2, <2x i32> <i32 1, i32 3>
- %4 = add <2 x i64> %1, %3
- ret <2 x i64> %4
-}
-
-define <8 x i16> @test_punpckhwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_punpckhwd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
-; GENERIC-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_punpckhwd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; ATOM-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_punpckhwd:
-; SLM: # %bb.0:
-; SLM-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; SLM-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_punpckhwd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
-; SANDY-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_punpckhwd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
-; SANDY-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_punpckhwd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; HASWELL-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_punpckhwd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; HASWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_punpckhwd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; BROADWELL-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_punpckhwd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; BROADWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_punpckhwd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_punpckhwd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_punpckhwd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; SKX-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_punpckhwd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; SKX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_punpckhwd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [2:0.50]
-; BDVER2-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_punpckhwd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [2:0.50]
-; BDVER2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_punpckhwd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
-; BTVER2-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_punpckhwd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
-; BTVER2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_punpckhwd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25]
-; ZNVER1-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_punpckhwd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25]
-; ZNVER1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
- %2 = load <8 x i16>, <8 x i16> *%a2, align 16
- %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
- ret <8 x i16> %3
-}
-
-define <16 x i8> @test_punpcklbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
-; GENERIC-LABEL: test_punpcklbw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
-; GENERIC-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_punpcklbw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; ATOM-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_punpcklbw:
-; SLM: # %bb.0:
-; SLM-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; SLM-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_punpcklbw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
-; SANDY-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_punpcklbw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
-; SANDY-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_punpcklbw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; HASWELL-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_punpcklbw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; HASWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_punpcklbw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; BROADWELL-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_punpcklbw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; BROADWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_punpcklbw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_punpcklbw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_punpcklbw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; SKX-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_punpcklbw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
-; SKX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_punpcklbw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [2:0.50]
-; BDVER2-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_punpcklbw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [2:0.50]
-; BDVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_punpcklbw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
-; BTVER2-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_punpcklbw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
-; BTVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_punpcklbw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25]
-; ZNVER1-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_punpcklbw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25]
-; ZNVER1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
- %2 = load <16 x i8>, <16 x i8> *%a2, align 16
- %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
- ret <16 x i8> %3
-}
-
-define <4 x i32> @test_punpckldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_punpckldq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
-; GENERIC-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50]
-; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_punpckldq:
-; ATOM: # %bb.0:
-; ATOM-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; ATOM-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [1:1.00]
-; ATOM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_punpckldq:
-; SLM: # %bb.0:
-; SLM-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; SLM-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [4:1.00]
-; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_punpckldq:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
-; SANDY-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50]
-; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_punpckldq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
-; SANDY-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50]
-; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_punpckldq:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; HASWELL-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
-; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_punpckldq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; HASWELL-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
-; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_punpckldq:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; BROADWELL-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00]
-; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_punpckldq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; BROADWELL-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00]
-; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_punpckldq:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
-; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_punpckldq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; SKYLAKE-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
-; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_punpckldq:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; SKX-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
-; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_punpckldq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
-; SKX-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
-; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_punpckldq:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [2:0.50]
-; BDVER2-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50]
-; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_punpckldq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [2:0.50]
-; BDVER2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50]
-; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_punpckldq:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
-; BTVER2-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00]
-; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_punpckldq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
-; BTVER2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00]
-; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_punpckldq:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.25]
-; ZNVER1-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [8:0.50]
-; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_punpckldq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.25]
-; ZNVER1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [8:0.50]
-; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
- %2 = load <4 x i32>, <4 x i32> *%a2, align 16
- %3 = shufflevector <4 x i32> %a1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
- %4 = add <4 x i32> %1, %3
- ret <4 x i32> %4
-}
-
-define <2 x i64> @test_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
-; GENERIC-LABEL: test_punpcklqdq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
-; GENERIC-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50]
-; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_punpcklqdq:
-; ATOM: # %bb.0:
-; ATOM-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; ATOM-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00]
-; ATOM-NEXT: paddq %xmm1, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_punpcklqdq:
-; SLM: # %bb.0:
-; SLM-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; SLM-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00]
-; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_punpcklqdq:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
-; SANDY-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50]
-; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_punpcklqdq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
-; SANDY-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50]
-; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_punpcklqdq:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; HASWELL-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
-; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_punpcklqdq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; HASWELL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
-; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_punpcklqdq:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; BROADWELL-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_punpcklqdq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_punpcklqdq:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
-; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_punpcklqdq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
-; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_punpcklqdq:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; SKX-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
-; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_punpcklqdq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
-; SKX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
-; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_punpcklqdq:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [2:0.50]
-; BDVER2-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50]
-; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_punpcklqdq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [2:0.50]
-; BDVER2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50]
-; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_punpcklqdq:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
-; BTVER2-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_punpcklqdq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
-; BTVER2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_punpcklqdq:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.25]
-; ZNVER1-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
-; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_punpcklqdq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.25]
-; ZNVER1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
-; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 0, i32 2>
- %2 = load <2 x i64>, <2 x i64> *%a2, align 16
- %3 = shufflevector <2 x i64> %a1, <2 x i64> %2, <2x i32> <i32 0, i32 2>
- %4 = add <2 x i64> %1, %3
- ret <2 x i64> %4
-}
-
-define <8 x i16> @test_punpcklwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_punpcklwd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
-; GENERIC-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_punpcklwd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; ATOM-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_punpcklwd:
-; SLM: # %bb.0:
-; SLM-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; SLM-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_punpcklwd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
-; SANDY-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_punpcklwd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
-; SANDY-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_punpcklwd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; HASWELL-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_punpcklwd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; HASWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_punpcklwd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; BROADWELL-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_punpcklwd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; BROADWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_punpcklwd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_punpcklwd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_punpcklwd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; SKX-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_punpcklwd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
-; SKX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_punpcklwd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [2:0.50]
-; BDVER2-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_punpcklwd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [2:0.50]
-; BDVER2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_punpcklwd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
-; BTVER2-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_punpcklwd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
-; BTVER2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_punpcklwd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25]
-; ZNVER1-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_punpcklwd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25]
-; ZNVER1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
- %2 = load <8 x i16>, <8 x i16> *%a2, align 16
- %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
- ret <8 x i16> %3
-}
-
-define <2 x i64> @test_pxor(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
-; GENERIC-LABEL: test_pxor:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pxor:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: pxor (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: paddq %xmm1, %xmm0 # sched: [2:1.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pxor:
-; SLM: # %bb.0:
-; SLM-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: pxor (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pxor:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pxor:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pxor:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pxor:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pxor:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pxor:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; BROADWELL-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pxor:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pxor:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pxor:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pxor:
-; SKX: # %bb.0:
-; SKX-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pxor:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pxor:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pxor:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pxor:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pxor:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pxor:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = xor <2 x i64> %a0, %a1
- %2 = load <2 x i64>, <2 x i64> *%a2, align 16
- %3 = xor <2 x i64> %1, %2
- %4 = add <2 x i64> %3, %a1
- ret <2 x i64> %4
-}
-
-define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_shufpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
-; GENERIC-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
-; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_shufpd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
-; ATOM-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [1:1.00]
-; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_shufpd:
-; SLM: # %bb.0:
-; SLM-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
-; SLM-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [4:1.00]
-; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_shufpd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
-; SANDY-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
-; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_shufpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
-; SANDY-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
-; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_shufpd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
-; HASWELL-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
-; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_shufpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
-; HASWELL-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
-; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_shufpd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
-; BROADWELL-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [6:1.00]
-; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_shufpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
-; BROADWELL-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [6:1.00]
-; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_shufpd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
-; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_shufpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
-; SKYLAKE-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
-; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_shufpd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
-; SKX-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
-; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_shufpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
-; SKX-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
-; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_shufpd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [2:0.50]
-; BDVER2-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:0.50]
-; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_shufpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [2:0.50]
-; BDVER2-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:0.50]
-; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_shufpd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50]
-; BTVER2-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [6:1.00]
-; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_shufpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50]
-; BTVER2-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [6:1.00]
-; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_shufpd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50]
-; ZNVER1-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [8:0.50]
-; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_shufpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50]
-; ZNVER1-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [8:0.50]
-; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2>
- %2 = load <2 x double>, <2 x double> *%a2, align 16
- %3 = shufflevector <2 x double> %a1, <2 x double> %2, <2 x i32> <i32 1, i32 2>
- %4 = fadd <2 x double> %1, %3
- ret <2 x double> %4
-}
-
-define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) {
-; GENERIC-LABEL: test_sqrtpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: sqrtpd %xmm0, %xmm1 # sched: [21:21.00]
-; GENERIC-NEXT: sqrtpd (%rdi), %xmm0 # sched: [27:21.00]
-; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_sqrtpd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: sqrtpd %xmm0, %xmm1 # sched: [125:62.50]
-; ATOM-NEXT: sqrtpd (%rdi), %xmm0 # sched: [125:62.50]
-; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_sqrtpd:
-; SLM: # %bb.0:
-; SLM-NEXT: sqrtpd (%rdi), %xmm1 # sched: [74:70.00]
-; SLM-NEXT: sqrtpd %xmm0, %xmm0 # sched: [71:70.00]
-; SLM-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
-; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_sqrtpd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [21:21.00]
-; SANDY-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [27:21.00]
-; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_sqrtpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [21:21.00]
-; SANDY-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [27:21.00]
-; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_sqrtpd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [16:14.00]
-; HASWELL-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [22:14.00]
-; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_sqrtpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [16:14.00]
-; HASWELL-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [22:14.00]
-; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_sqrtpd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [16:14.00]
-; BROADWELL-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [21:14.00]
-; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_sqrtpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [16:14.00]
-; BROADWELL-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [21:14.00]
-; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_sqrtpd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [18:6.00]
-; SKYLAKE-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [24:6.00]
-; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_sqrtpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [18:6.00]
-; SKYLAKE-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [24:6.00]
-; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_sqrtpd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [18:6.00]
-; SKX-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [24:6.00]
-; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_sqrtpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [18:6.00]
-; SKX-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [24:6.00]
-; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_sqrtpd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [9:13.50]
-; BDVER2-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [14:13.50]
-; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_sqrtpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [14:13.50]
-; BDVER2-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [9:13.50]
-; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_sqrtpd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [27:27.00]
-; BTVER2-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [32:27.00]
-; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_sqrtpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [32:27.00]
-; BTVER2-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [27:27.00]
-; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_sqrtpd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [20:20.00]
-; ZNVER1-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [27:20.00]
-; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_sqrtpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [27:20.00]
-; ZNVER1-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [20:20.00]
-; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0)
- %2 = load <2 x double>, <2 x double> *%a1, align 16
- %3 = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %2)
- %4 = fadd <2 x double> %1, %3
- ret <2 x double> %4
-}
-declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
-
-; TODO - sqrtsd_m
-
-define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) {
-; GENERIC-LABEL: test_sqrtsd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: sqrtsd %xmm0, %xmm0 # sched: [21:21.00]
-; GENERIC-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50]
-; GENERIC-NEXT: sqrtsd %xmm1, %xmm1 # sched: [21:21.00]
-; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_sqrtsd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movapd (%rdi), %xmm1 # sched: [1:1.00]
-; ATOM-NEXT: sqrtsd %xmm0, %xmm0 # sched: [62:31.00]
-; ATOM-NEXT: sqrtsd %xmm1, %xmm1 # sched: [62:31.00]
-; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_sqrtsd:
-; SLM: # %bb.0:
-; SLM-NEXT: movapd (%rdi), %xmm1 # sched: [3:1.00]
-; SLM-NEXT: sqrtsd %xmm0, %xmm0 # sched: [35:35.00]
-; SLM-NEXT: sqrtsd %xmm1, %xmm1 # sched: [35:35.00]
-; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_sqrtsd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [21:21.00]
-; SANDY-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50]
-; SANDY-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [21:21.00]
-; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_sqrtsd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:21.00]
-; SANDY-NEXT: vmovapd (%rdi), %xmm1 # sched: [6:0.50]
-; SANDY-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [21:21.00]
-; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_sqrtsd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [16:14.00]
-; HASWELL-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50]
-; HASWELL-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [16:14.00]
-; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_sqrtsd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [16:14.00]
-; HASWELL-NEXT: vmovapd (%rdi), %xmm1 # sched: [6:0.50]
-; HASWELL-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [16:14.00]
-; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_sqrtsd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [16:8.00]
-; BROADWELL-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [5:0.50]
-; BROADWELL-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [16:8.00]
-; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_sqrtsd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [16:8.00]
-; BROADWELL-NEXT: vmovapd (%rdi), %xmm1 # sched: [5:0.50]
-; BROADWELL-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [16:8.00]
-; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_sqrtsd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [18:6.00]
-; SKYLAKE-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50]
-; SKYLAKE-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [18:6.00]
-; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_sqrtsd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:6.00]
-; SKYLAKE-NEXT: vmovapd (%rdi), %xmm1 # sched: [6:0.50]
-; SKYLAKE-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [18:6.00]
-; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_sqrtsd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [18:6.00]
-; SKX-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50]
-; SKX-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [18:6.00]
-; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_sqrtsd:
-; SKX: # %bb.0:
-; SKX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:6.00]
-; SKX-NEXT: vmovapd (%rdi), %xmm1 # sched: [6:0.50]
-; SKX-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [18:6.00]
-; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_sqrtsd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [5:0.50]
-; BDVER2-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [9:13.50]
-; BDVER2-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [9:13.50]
-; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_sqrtsd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovapd (%rdi), %xmm1 # sched: [5:0.50]
-; BDVER2-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [9:13.50]
-; BDVER2-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [9:13.50]
-; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_sqrtsd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [5:1.00]
-; BTVER2-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [27:27.00]
-; BTVER2-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [27:27.00]
-; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_sqrtsd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovapd (%rdi), %xmm1 # sched: [5:1.00]
-; BTVER2-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [27:27.00]
-; BTVER2-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [27:27.00]
-; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_sqrtsd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [20:20.00]
-; ZNVER1-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [20:20.00]
-; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_sqrtsd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovapd (%rdi), %xmm1 # sched: [8:0.50]
-; ZNVER1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [20:20.00]
-; ZNVER1-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [20:20.00]
-; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0)
- %2 = load <2 x double>, <2 x double> *%a1, align 16
- %3 = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %2)
- %4 = fadd <2 x double> %1, %3
- ret <2 x double> %4
-}
-declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
-
-define <2 x double> @test_subpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_subpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: subpd (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_subpd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: subpd %xmm1, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT: subpd (%rdi), %xmm0 # sched: [7:3.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_subpd:
-; SLM: # %bb.0:
-; SLM-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: subpd (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_subpd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_subpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_subpd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [9:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_subpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_subpd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_subpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_subpd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_subpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_subpd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_subpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_subpd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [10:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_subpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_subpd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_subpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_subpd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_subpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fsub <2 x double> %a0, %a1
- %2 = load <2 x double>, <2 x double> *%a2, align 16
- %3 = fsub <2 x double> %1, %2
- ret <2 x double> %3
-}
-
-define double @test_subsd(double %a0, double %a1, double *%a2) {
-; GENERIC-LABEL: test_subsd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: subsd (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_subsd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: subsd %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: subsd (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_subsd:
-; SLM: # %bb.0:
-; SLM-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: subsd (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_subsd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_subsd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_subsd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_subsd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_subsd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_subsd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_subsd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [9:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_subsd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_subsd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [9:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_subsd:
-; SKX: # %bb.0:
-; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_subsd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [10:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_subsd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_subsd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_subsd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_subsd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_subsd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = fsub double %a0, %a1
- %2 = load double, double *%a2, align 8
- %3 = fsub double %1, %2
- ret double %3
-}
-
-define i32 @test_ucomisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_ucomisd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: ucomisd %xmm1, %xmm0 # sched: [2:1.00]
-; GENERIC-NEXT: setnp %al # sched: [1:0.50]
-; GENERIC-NEXT: sete %cl # sched: [1:0.50]
-; GENERIC-NEXT: andb %al, %cl # sched: [1:0.33]
-; GENERIC-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00]
-; GENERIC-NEXT: setnp %al # sched: [1:0.50]
-; GENERIC-NEXT: sete %dl # sched: [1:0.50]
-; GENERIC-NEXT: andb %al, %dl # sched: [1:0.33]
-; GENERIC-NEXT: orb %cl, %dl # sched: [1:0.33]
-; GENERIC-NEXT: movzbl %dl, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_ucomisd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: ucomisd %xmm1, %xmm0 # sched: [9:4.50]
-; ATOM-NEXT: setnp %al # sched: [1:0.50]
-; ATOM-NEXT: sete %cl # sched: [1:0.50]
-; ATOM-NEXT: andb %al, %cl # sched: [1:0.50]
-; ATOM-NEXT: ucomisd (%rdi), %xmm0 # sched: [10:5.00]
-; ATOM-NEXT: setnp %al # sched: [1:0.50]
-; ATOM-NEXT: sete %dl # sched: [1:0.50]
-; ATOM-NEXT: andb %al, %dl # sched: [1:0.50]
-; ATOM-NEXT: orb %cl, %dl # sched: [1:0.50]
-; ATOM-NEXT: movzbl %dl, %eax # sched: [1:1.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_ucomisd:
-; SLM: # %bb.0:
-; SLM-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: setnp %al # sched: [1:0.50]
-; SLM-NEXT: sete %cl # sched: [1:0.50]
-; SLM-NEXT: andb %al, %cl # sched: [1:0.50]
-; SLM-NEXT: ucomisd (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT: setnp %al # sched: [1:0.50]
-; SLM-NEXT: sete %dl # sched: [1:0.50]
-; SLM-NEXT: andb %al, %dl # sched: [1:0.50]
-; SLM-NEXT: orb %cl, %dl # sched: [1:0.50]
-; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_ucomisd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [2:1.00]
-; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50]
-; SANDY-SSE-NEXT: sete %cl # sched: [1:0.50]
-; SANDY-SSE-NEXT: andb %al, %cl # sched: [1:0.33]
-; SANDY-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00]
-; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50]
-; SANDY-SSE-NEXT: sete %dl # sched: [1:0.50]
-; SANDY-SSE-NEXT: andb %al, %dl # sched: [1:0.33]
-; SANDY-SSE-NEXT: orb %cl, %dl # sched: [1:0.33]
-; SANDY-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_ucomisd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00]
-; SANDY-NEXT: setnp %al # sched: [1:0.50]
-; SANDY-NEXT: sete %cl # sched: [1:0.50]
-; SANDY-NEXT: andb %al, %cl # sched: [1:0.33]
-; SANDY-NEXT: vucomisd (%rdi), %xmm0 # sched: [8:1.00]
-; SANDY-NEXT: setnp %al # sched: [1:0.50]
-; SANDY-NEXT: sete %dl # sched: [1:0.50]
-; SANDY-NEXT: andb %al, %dl # sched: [1:0.33]
-; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33]
-; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_ucomisd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
-; HASWELL-SSE-NEXT: sete %cl # sched: [1:0.50]
-; HASWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
-; HASWELL-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
-; HASWELL-SSE-NEXT: sete %dl # sched: [1:0.50]
-; HASWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
-; HASWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
-; HASWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_ucomisd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: setnp %al # sched: [1:0.50]
-; HASWELL-NEXT: sete %cl # sched: [1:0.50]
-; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25]
-; HASWELL-NEXT: vucomisd (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-NEXT: setnp %al # sched: [1:0.50]
-; HASWELL-NEXT: sete %dl # sched: [1:0.50]
-; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25]
-; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25]
-; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_ucomisd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: sete %cl # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: sete %dl # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_ucomisd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: setnp %al # sched: [1:0.50]
-; BROADWELL-NEXT: sete %cl # sched: [1:0.50]
-; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25]
-; BROADWELL-NEXT: vucomisd (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT: setnp %al # sched: [1:0.50]
-; BROADWELL-NEXT: sete %dl # sched: [1:0.50]
-; BROADWELL-NEXT: andb %al, %dl # sched: [1:0.25]
-; BROADWELL-NEXT: orb %cl, %dl # sched: [1:0.25]
-; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_ucomisd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: sete %cl # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [7:1.00]
-; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: sete %dl # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_ucomisd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00]
-; SKYLAKE-NEXT: setnp %al # sched: [1:0.50]
-; SKYLAKE-NEXT: sete %cl # sched: [1:0.50]
-; SKYLAKE-NEXT: andb %al, %cl # sched: [1:0.25]
-; SKYLAKE-NEXT: vucomisd (%rdi), %xmm0 # sched: [7:1.00]
-; SKYLAKE-NEXT: setnp %al # sched: [1:0.50]
-; SKYLAKE-NEXT: sete %dl # sched: [1:0.50]
-; SKYLAKE-NEXT: andb %al, %dl # sched: [1:0.25]
-; SKYLAKE-NEXT: orb %cl, %dl # sched: [1:0.25]
-; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_ucomisd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [2:1.00]
-; SKX-SSE-NEXT: setnp %al # sched: [1:0.50]
-; SKX-SSE-NEXT: sete %cl # sched: [1:0.50]
-; SKX-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
-; SKX-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [7:1.00]
-; SKX-SSE-NEXT: setnp %al # sched: [1:0.50]
-; SKX-SSE-NEXT: sete %dl # sched: [1:0.50]
-; SKX-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
-; SKX-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
-; SKX-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_ucomisd:
-; SKX: # %bb.0:
-; SKX-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00]
-; SKX-NEXT: setnp %al # sched: [1:0.50]
-; SKX-NEXT: sete %cl # sched: [1:0.50]
-; SKX-NEXT: andb %al, %cl # sched: [1:0.25]
-; SKX-NEXT: vucomisd (%rdi), %xmm0 # sched: [7:1.00]
-; SKX-NEXT: setnp %al # sched: [1:0.50]
-; SKX-NEXT: sete %dl # sched: [1:0.50]
-; SKX-NEXT: andb %al, %dl # sched: [1:0.25]
-; SKX-NEXT: orb %cl, %dl # sched: [1:0.25]
-; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_ucomisd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [1:1.00]
-; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
-; BDVER2-SSE-NEXT: sete %cl # sched: [1:0.50]
-; BDVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50]
-; BDVER2-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [6:1.00]
-; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
-; BDVER2-SSE-NEXT: sete %dl # sched: [1:0.50]
-; BDVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50]
-; BDVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50]
-; BDVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_ucomisd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vucomisd %xmm1, %xmm0 # sched: [1:1.00]
-; BDVER2-NEXT: setnp %al # sched: [1:0.50]
-; BDVER2-NEXT: sete %cl # sched: [1:0.50]
-; BDVER2-NEXT: andb %al, %cl # sched: [1:0.50]
-; BDVER2-NEXT: vucomisd (%rdi), %xmm0 # sched: [6:1.00]
-; BDVER2-NEXT: setnp %al # sched: [1:0.50]
-; BDVER2-NEXT: sete %dl # sched: [1:0.50]
-; BDVER2-NEXT: andb %al, %dl # sched: [1:0.50]
-; BDVER2-NEXT: orb %cl, %dl # sched: [1:0.50]
-; BDVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_ucomisd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
-; BTVER2-SSE-NEXT: sete %cl # sched: [1:0.50]
-; BTVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50]
-; BTVER2-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
-; BTVER2-SSE-NEXT: sete %dl # sched: [1:0.50]
-; BTVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50]
-; BTVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50]
-; BTVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_ucomisd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: setnp %al # sched: [1:0.50]
-; BTVER2-NEXT: sete %cl # sched: [1:0.50]
-; BTVER2-NEXT: andb %al, %cl # sched: [1:0.50]
-; BTVER2-NEXT: vucomisd (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-NEXT: setnp %al # sched: [1:0.50]
-; BTVER2-NEXT: sete %dl # sched: [1:0.50]
-; BTVER2-NEXT: andb %al, %dl # sched: [1:0.50]
-; BTVER2-NEXT: orb %cl, %dl # sched: [1:0.50]
-; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_ucomisd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: sete %cl # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: sete %dl # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_ucomisd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: setnp %al # sched: [1:0.25]
-; ZNVER1-NEXT: sete %cl # sched: [1:0.25]
-; ZNVER1-NEXT: andb %al, %cl # sched: [1:0.25]
-; ZNVER1-NEXT: vucomisd (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT: setnp %al # sched: [1:0.25]
-; ZNVER1-NEXT: sete %dl # sched: [1:0.25]
-; ZNVER1-NEXT: andb %al, %dl # sched: [1:0.25]
-; ZNVER1-NEXT: orb %cl, %dl # sched: [1:0.25]
-; ZNVER1-NEXT: movzbl %dl, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1)
- %2 = load <2 x double>, <2 x double> *%a2, align 8
- %3 = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %2)
- %4 = or i32 %1, %3
- ret i32 %4
-}
-declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
-
-define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_unpckhpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; GENERIC-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
-; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_unpckhpd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; ATOM-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [1:1.00]
-; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_unpckhpd:
-; SLM: # %bb.0:
-; SLM-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; SLM-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [4:1.00]
-; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_unpckhpd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; SANDY-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
-; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_unpckhpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; SANDY-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
-; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_unpckhpd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; HASWELL-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
-; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_unpckhpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; HASWELL-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
-; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_unpckhpd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; BROADWELL-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00]
-; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_unpckhpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; BROADWELL-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00]
-; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_unpckhpd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
-; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_unpckhpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; SKYLAKE-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
-; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_unpckhpd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; SKX-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
-; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_unpckhpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
-; SKX-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
-; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_unpckhpd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [2:0.50]
-; BDVER2-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50]
-; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_unpckhpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [2:0.50]
-; BDVER2-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50]
-; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_unpckhpd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
-; BTVER2-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00]
-; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_unpckhpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
-; BTVER2-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00]
-; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_unpckhpd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
-; ZNVER1-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [8:0.50]
-; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_unpckhpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
-; ZNVER1-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [8:0.50]
-; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3>
- %2 = load <2 x double>, <2 x double> *%a2, align 16
- %3 = shufflevector <2 x double> %a1, <2 x double> %2, <2 x i32> <i32 1, i32 3>
- %4 = fadd <2 x double> %1, %3
- ret <2 x double> %4
-}
-
-define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_unpcklpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00]
-; GENERIC-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00]
-; GENERIC-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00]
-; GENERIC-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_unpcklpd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50]
-; ATOM-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [1:1.00]
-; ATOM-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00]
-; ATOM-NEXT: addpd %xmm2, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_unpcklpd:
-; SLM: # %bb.0:
-; SLM-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50]
-; SLM-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [4:1.00]
-; SLM-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00]
-; SLM-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_unpcklpd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00]
-; SANDY-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00]
-; SANDY-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00]
-; SANDY-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_unpcklpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [1:1.00]
-; SANDY-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00]
-; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_unpcklpd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00]
-; HASWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00]
-; HASWELL-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_unpcklpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [1:1.00]
-; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00]
-; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_unpcklpd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00]
-; BROADWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [6:1.00]
-; BROADWELL-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_unpcklpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [1:1.00]
-; BROADWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [6:1.00]
-; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_unpcklpd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00]
-; SKYLAKE-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_unpcklpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [1:1.00]
-; SKYLAKE-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00]
-; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_unpcklpd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.33]
-; SKX-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00]
-; SKX-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00]
-; SKX-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_unpcklpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [1:1.00]
-; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00]
-; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_unpcklpd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50]
-; BDVER2-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:0.50]
-; BDVER2-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [2:0.50]
-; BDVER2-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_unpcklpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [2:0.50]
-; BDVER2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:0.50]
-; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_unpcklpd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [6:1.00]
-; BTVER2-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:0.50]
-; BTVER2-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_unpcklpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [1:0.50]
-; BTVER2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [6:1.00]
-; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_unpcklpd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [8:0.50]
-; ZNVER1-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:0.50]
-; ZNVER1-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_unpcklpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [1:0.50]
-; ZNVER1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [8:0.50]
-; ZNVER1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2>
- %2 = load <2 x double>, <2 x double> *%a2, align 16
- %3 = shufflevector <2 x double> %1, <2 x double> %2, <2 x i32> <i32 0, i32 2>
- %4 = fadd <2 x double> %1, %3
- ret <2 x double> %4
-}
-
-define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_xorpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: xorpd %xmm1, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: xorpd (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_xorpd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: xorpd (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_xorpd:
-; SLM: # %bb.0:
-; SLM-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: xorpd (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_xorpd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:1.00]
-; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_xorpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SANDY-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_xorpd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:1.00]
-; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_xorpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_xorpd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [6:1.00]
-; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_xorpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_xorpd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_xorpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_xorpd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_xorpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_xorpd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_xorpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_xorpd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_xorpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_xorpd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_xorpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = bitcast <2 x double> %a0 to <4 x i32>
- %2 = bitcast <2 x double> %a1 to <4 x i32>
- %3 = xor <4 x i32> %1, %2
- %4 = load <2 x double>, <2 x double> *%a2, align 16
- %5 = bitcast <2 x double> %4 to <4 x i32>
- %6 = xor <4 x i32> %3, %5
- %7 = bitcast <4 x i32> %6 to <2 x double>
- %8 = fadd <2 x double> %a1, %7
- ret <2 x double> %8
-}
-
-!0 = !{i32 1}
diff --git a/llvm/test/CodeGen/X86/sse3-schedule.ll b/llvm/test/CodeGen/X86/sse3-schedule.ll
deleted file mode 100644
index c80d0e44659..00000000000
--- a/llvm/test/CodeGen/X86/sse3-schedule.ll
+++ /dev/null
@@ -1,1549 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefixes=CHECK,GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,ATOM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SLM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1
-
-define <2 x double> @test_addsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_addsubpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: addsubpd (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_addsubpd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: addsubpd %xmm1, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT: addsubpd (%rdi), %xmm0 # sched: [7:3.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_addsubpd:
-; SLM: # %bb.0:
-; SLM-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: addsubpd (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_addsubpd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_addsubpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_addsubpd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [9:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_addsubpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_addsubpd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_addsubpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_addsubpd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_addsubpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_addsubpd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_addsubpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_addsubpd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [10:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_addsubpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_addsubpd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_addsubpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_addsubpd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_addsubpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1)
- %2 = load <2 x double>, <2 x double> *%a2, align 16
- %3 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %1, <2 x double> %2)
- ret <2 x double> %3
-}
-declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
-
-define <4 x float> @test_addsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
-; GENERIC-LABEL: test_addsubps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: addsubps (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_addsubps:
-; ATOM: # %bb.0:
-; ATOM-NEXT: addsubps %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: addsubps (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_addsubps:
-; SLM: # %bb.0:
-; SLM-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: addsubps (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_addsubps:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_addsubps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_addsubps:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [9:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_addsubps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_addsubps:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [8:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_addsubps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_addsubps:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_addsubps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_addsubps:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_addsubps:
-; SKX: # %bb.0:
-; SKX-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_addsubps:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [10:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_addsubps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_addsubps:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_addsubps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_addsubps:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [10:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_addsubps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1)
- %2 = load <4 x float>, <4 x float> *%a2, align 16
- %3 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %1, <4 x float> %2)
- ret <4 x float> %3
-}
-declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
-
-define <2 x double> @test_haddpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_haddpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: haddpd %xmm1, %xmm0 # sched: [5:2.00]
-; GENERIC-NEXT: haddpd (%rdi), %xmm0 # sched: [11:2.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_haddpd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: haddpd %xmm1, %xmm0 # sched: [8:4.00]
-; ATOM-NEXT: haddpd (%rdi), %xmm0 # sched: [9:4.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_haddpd:
-; SLM: # %bb.0:
-; SLM-NEXT: haddpd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: haddpd (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_haddpd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [5:2.00]
-; SANDY-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [11:2.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_haddpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
-; SANDY-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_haddpd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [5:2.00]
-; HASWELL-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [11:2.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_haddpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
-; HASWELL-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_haddpd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [5:2.00]
-; BROADWELL-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [10:2.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_haddpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
-; BROADWELL-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_haddpd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [6:2.00]
-; SKYLAKE-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [12:2.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_haddpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
-; SKYLAKE-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_haddpd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [6:2.00]
-; SKX-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [12:2.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_haddpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
-; SKX-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_haddpd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [11:1.00]
-; BDVER2-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [16:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_haddpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
-; BDVER2-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_haddpd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [4:1.00]
-; BTVER2-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [9:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_haddpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; BTVER2-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_haddpd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_haddpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1)
- %2 = load <2 x double>, <2 x double> *%a2, align 16
- %3 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %1, <2 x double> %2)
- ret <2 x double> %3
-}
-declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
-
-define <4 x float> @test_haddps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
-; GENERIC-LABEL: test_haddps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: haddps %xmm1, %xmm0 # sched: [5:2.00]
-; GENERIC-NEXT: haddps (%rdi), %xmm0 # sched: [11:2.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_haddps:
-; ATOM: # %bb.0:
-; ATOM-NEXT: haddps %xmm1, %xmm0 # sched: [8:4.00]
-; ATOM-NEXT: haddps (%rdi), %xmm0 # sched: [9:4.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_haddps:
-; SLM: # %bb.0:
-; SLM-NEXT: haddps %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: haddps (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_haddps:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [5:2.00]
-; SANDY-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [11:2.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_haddps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
-; SANDY-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_haddps:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [5:2.00]
-; HASWELL-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [11:2.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_haddps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
-; HASWELL-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_haddps:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [5:2.00]
-; BROADWELL-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [10:2.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_haddps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
-; BROADWELL-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_haddps:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [6:2.00]
-; SKYLAKE-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [12:2.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_haddps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
-; SKYLAKE-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_haddps:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [6:2.00]
-; SKX-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [12:2.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_haddps:
-; SKX: # %bb.0:
-; SKX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
-; SKX-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_haddps:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [11:1.00]
-; BDVER2-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [16:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_haddps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
-; BDVER2-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_haddps:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [4:1.00]
-; BTVER2-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [9:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_haddps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; BTVER2-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_haddps:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_haddps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1)
- %2 = load <4 x float>, <4 x float> *%a2, align 16
- %3 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %1, <4 x float> %2)
- ret <4 x float> %3
-}
-declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
-
-define <2 x double> @test_hsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_hsubpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: hsubpd %xmm1, %xmm0 # sched: [5:2.00]
-; GENERIC-NEXT: hsubpd (%rdi), %xmm0 # sched: [11:2.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_hsubpd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: hsubpd %xmm1, %xmm0 # sched: [8:4.00]
-; ATOM-NEXT: hsubpd (%rdi), %xmm0 # sched: [9:4.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_hsubpd:
-; SLM: # %bb.0:
-; SLM-NEXT: hsubpd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: hsubpd (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_hsubpd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [5:2.00]
-; SANDY-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [11:2.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_hsubpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
-; SANDY-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_hsubpd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [5:2.00]
-; HASWELL-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [11:2.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_hsubpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
-; HASWELL-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_hsubpd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [5:2.00]
-; BROADWELL-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [10:2.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_hsubpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
-; BROADWELL-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_hsubpd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [6:2.00]
-; SKYLAKE-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [12:2.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_hsubpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
-; SKYLAKE-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_hsubpd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [6:2.00]
-; SKX-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [12:2.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_hsubpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
-; SKX-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_hsubpd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [11:1.00]
-; BDVER2-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [16:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_hsubpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
-; BDVER2-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_hsubpd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [4:1.00]
-; BTVER2-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [9:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_hsubpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; BTVER2-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_hsubpd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_hsubpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1)
- %2 = load <2 x double>, <2 x double> *%a2, align 16
- %3 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %1, <2 x double> %2)
- ret <2 x double> %3
-}
-declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
-
-define <4 x float> @test_hsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
-; GENERIC-LABEL: test_hsubps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: hsubps %xmm1, %xmm0 # sched: [5:2.00]
-; GENERIC-NEXT: hsubps (%rdi), %xmm0 # sched: [11:2.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_hsubps:
-; ATOM: # %bb.0:
-; ATOM-NEXT: hsubps %xmm1, %xmm0 # sched: [8:4.00]
-; ATOM-NEXT: hsubps (%rdi), %xmm0 # sched: [9:4.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_hsubps:
-; SLM: # %bb.0:
-; SLM-NEXT: hsubps %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: hsubps (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_hsubps:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [5:2.00]
-; SANDY-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [11:2.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_hsubps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
-; SANDY-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_hsubps:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [5:2.00]
-; HASWELL-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [11:2.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_hsubps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
-; HASWELL-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_hsubps:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [5:2.00]
-; BROADWELL-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [10:2.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_hsubps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
-; BROADWELL-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_hsubps:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [6:2.00]
-; SKYLAKE-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [12:2.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_hsubps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
-; SKYLAKE-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_hsubps:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [6:2.00]
-; SKX-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [12:2.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_hsubps:
-; SKX: # %bb.0:
-; SKX-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [6:2.00]
-; SKX-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_hsubps:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [11:1.00]
-; BDVER2-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [16:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_hsubps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [11:1.00]
-; BDVER2-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_hsubps:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [4:1.00]
-; BTVER2-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [9:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_hsubps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; BTVER2-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_hsubps:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_hsubps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1)
- %2 = load <4 x float>, <4 x float> *%a2, align 16
- %3 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %1, <4 x float> %2)
- ret <4 x float> %3
-}
-declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
-
-define <16 x i8> @test_lddqu(i8* %a0) {
-; GENERIC-LABEL: test_lddqu:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_lddqu:
-; ATOM: # %bb.0:
-; ATOM-NEXT: lddqu (%rdi), %xmm0 # sched: [3:1.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_lddqu:
-; SLM: # %bb.0:
-; SLM-NEXT: lddqu (%rdi), %xmm0 # sched: [3:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_lddqu:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_lddqu:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_lddqu:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_lddqu:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_lddqu:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [5:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_lddqu:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vlddqu (%rdi), %xmm0 # sched: [5:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_lddqu:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_lddqu:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_lddqu:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_lddqu:
-; SKX: # %bb.0:
-; SKX-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_lddqu:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [5:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_lddqu:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vlddqu (%rdi), %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_lddqu:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [5:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_lddqu:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vlddqu (%rdi), %xmm0 # sched: [5:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_lddqu:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_lddqu:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vlddqu (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0)
- ret <16 x i8> %1
-}
-declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly
-
-define void @test_monitor(i8* %a0, i32 %a1, i32 %a2) {
-; GENERIC-LABEL: test_monitor:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl %esi, %ecx # sched: [1:0.33]
-; GENERIC-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
-; GENERIC-NEXT: monitor # sched: [100:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_monitor:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl %esi, %ecx # sched: [1:0.50]
-; ATOM-NEXT: leaq (%rdi), %rax # sched: [1:1.00]
-; ATOM-NEXT: monitor # sched: [45:22.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_monitor:
-; SLM: # %bb.0:
-; SLM-NEXT: movl %esi, %ecx # sched: [1:0.50]
-; SLM-NEXT: leaq (%rdi), %rax # sched: [1:1.00]
-; SLM-NEXT: monitor # sched: [100:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_monitor:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movl %esi, %ecx # sched: [1:0.33]
-; SANDY-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
-; SANDY-SSE-NEXT: monitor # sched: [100:0.33]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_monitor:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl %esi, %ecx # sched: [1:0.33]
-; SANDY-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
-; SANDY-NEXT: monitor # sched: [100:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_monitor:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25]
-; HASWELL-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
-; HASWELL-SSE-NEXT: monitor # sched: [100:0.25]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_monitor:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl %esi, %ecx # sched: [1:0.25]
-; HASWELL-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
-; HASWELL-NEXT: monitor # sched: [100:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_monitor:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: monitor # sched: [100:0.25]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_monitor:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl %esi, %ecx # sched: [1:0.25]
-; BROADWELL-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
-; BROADWELL-NEXT: monitor # sched: [100:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_monitor:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: monitor # sched: [100:0.25]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_monitor:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl %esi, %ecx # sched: [1:0.25]
-; SKYLAKE-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
-; SKYLAKE-NEXT: monitor # sched: [100:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_monitor:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25]
-; SKX-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
-; SKX-SSE-NEXT: monitor # sched: [100:0.25]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_monitor:
-; SKX: # %bb.0:
-; SKX-NEXT: movl %esi, %ecx # sched: [1:0.25]
-; SKX-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
-; SKX-NEXT: monitor # sched: [100:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_monitor:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
-; BDVER2-SSE-NEXT: movl %esi, %ecx # sched: [1:0.50]
-; BDVER2-SSE-NEXT: monitor # sched: [100:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_monitor:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
-; BDVER2-NEXT: movl %esi, %ecx # sched: [1:0.50]
-; BDVER2-NEXT: monitor # sched: [100:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_monitor:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movl %esi, %ecx # sched: [1:0.50]
-; BTVER2-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
-; BTVER2-SSE-NEXT: monitor # sched: [100:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_monitor:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl %esi, %ecx # sched: [1:0.50]
-; BTVER2-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
-; BTVER2-NEXT: monitor # sched: [100:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_monitor:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: monitor # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_monitor:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl %esi, %ecx # sched: [1:0.25]
-; ZNVER1-NEXT: leaq (%rdi), %rax # sched: [1:0.25]
-; ZNVER1-NEXT: monitor # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void @llvm.x86.sse3.monitor(i8* %a0, i32 %a1, i32 %a2)
- ret void
-}
-declare void @llvm.x86.sse3.monitor(i8*, i32, i32)
-
-define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) {
-; GENERIC-LABEL: test_movddup:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
-; GENERIC-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:0.50]
-; GENERIC-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movddup:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
-; ATOM-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [1:1.00]
-; ATOM-NEXT: subpd %xmm1, %xmm0 # sched: [6:3.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movddup:
-; SLM: # %bb.0:
-; SLM-NEXT: movddup {{.*#+}} xmm1 = mem[0,0] sched: [4:1.00]
-; SLM-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
-; SLM-NEXT: subpd %xmm0, %xmm1 # sched: [3:1.00]
-; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movddup:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
-; SANDY-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:0.50]
-; SANDY-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movddup:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
-; SANDY-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:0.50]
-; SANDY-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movddup:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
-; HASWELL-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50]
-; HASWELL-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movddup:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
-; HASWELL-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50]
-; HASWELL-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movddup:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
-; BROADWELL-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50]
-; BROADWELL-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movddup:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
-; BROADWELL-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50]
-; BROADWELL-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movddup:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50]
-; SKYLAKE-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movddup:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
-; SKYLAKE-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50]
-; SKYLAKE-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movddup:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
-; SKX-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50]
-; SKX-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movddup:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
-; SKX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50]
-; SKX-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movddup:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [2:0.50]
-; BDVER2-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [7:0.50]
-; BDVER2-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movddup:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [7:0.50]
-; BDVER2-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [2:0.50]
-; BDVER2-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movddup:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:0.50]
-; BTVER2-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:1.00]
-; BTVER2-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movddup:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:1.00]
-; BTVER2-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:0.50]
-; BTVER2-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movddup:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:0.50]
-; ZNVER1-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [8:0.50]
-; ZNVER1-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movddup:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [8:0.50]
-; ZNVER1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:0.50]
-; ZNVER1-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> zeroinitializer
- %2 = load <2 x double>, <2 x double> *%a1, align 16
- %3 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> zeroinitializer
- %4 = fsub <2 x double> %3, %1 ; Use fsub to stop the movddup from being folded as a broadcast load in avx512vl.
- ret <2 x double> %4
-}
-
-define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) {
-; GENERIC-LABEL: test_movshdup:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
-; GENERIC-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
-; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movshdup:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
-; ATOM-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [1:1.00]
-; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movshdup:
-; SLM: # %bb.0:
-; SLM-NEXT: movshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [4:1.00]
-; SLM-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
-; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movshdup:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
-; SANDY-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
-; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movshdup:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
-; SANDY-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
-; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movshdup:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
-; HASWELL-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
-; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movshdup:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
-; HASWELL-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
-; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movshdup:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
-; BROADWELL-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [5:0.50]
-; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movshdup:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
-; BROADWELL-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [5:0.50]
-; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movshdup:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
-; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movshdup:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
-; SKYLAKE-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
-; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movshdup:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
-; SKX-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
-; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movshdup:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
-; SKX-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movshdup:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [2:0.50]
-; BDVER2-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [7:0.50]
-; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movshdup:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [7:0.50]
-; BDVER2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [2:0.50]
-; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movshdup:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:0.50]
-; BTVER2-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:1.00]
-; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movshdup:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:1.00]
-; BTVER2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:0.50]
-; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movshdup:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:0.50]
-; ZNVER1-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [8:0.50]
-; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movshdup:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [8:0.50]
-; ZNVER1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:0.50]
-; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
- %2 = load <4 x float>, <4 x float> *%a1, align 16
- %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
- %4 = fadd <4 x float> %1, %3
- ret <4 x float> %4
-}
-
-define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) {
-; GENERIC-LABEL: test_movsldup:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
-; GENERIC-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
-; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_movsldup:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
-; ATOM-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [1:1.00]
-; ATOM-NEXT: addps %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_movsldup:
-; SLM: # %bb.0:
-; SLM-NEXT: movsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [4:1.00]
-; SLM-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
-; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movsldup:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
-; SANDY-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
-; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movsldup:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
-; SANDY-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
-; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movsldup:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
-; HASWELL-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
-; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movsldup:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
-; HASWELL-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
-; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movsldup:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
-; BROADWELL-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [5:0.50]
-; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movsldup:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
-; BROADWELL-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [5:0.50]
-; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movsldup:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
-; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movsldup:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
-; SKYLAKE-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
-; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movsldup:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
-; SKX-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
-; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movsldup:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
-; SKX-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movsldup:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [2:0.50]
-; BDVER2-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [7:0.50]
-; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movsldup:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [7:0.50]
-; BDVER2-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [2:0.50]
-; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movsldup:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:0.50]
-; BTVER2-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:1.00]
-; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movsldup:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:1.00]
-; BTVER2-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:0.50]
-; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movsldup:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [100:0.25]
-; ZNVER1-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [100:0.25]
-; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movsldup:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [8:0.50]
-; ZNVER1-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:0.50]
-; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
- %2 = load <4 x float>, <4 x float> *%a1, align 16
- %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
- %4 = fadd <4 x float> %1, %3
- ret <4 x float> %4
-}
-
-define void @test_mwait(i32 %a0, i32 %a1) {
-; GENERIC-LABEL: test_mwait:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl %esi, %eax # sched: [1:0.33]
-; GENERIC-NEXT: movl %edi, %ecx # sched: [1:0.33]
-; GENERIC-NEXT: mwait # sched: [100:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_mwait:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl %esi, %eax # sched: [1:0.50]
-; ATOM-NEXT: movl %edi, %ecx # sched: [1:0.50]
-; ATOM-NEXT: mwait # sched: [46:23.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_mwait:
-; SLM: # %bb.0:
-; SLM-NEXT: movl %esi, %eax # sched: [1:0.50]
-; SLM-NEXT: movl %edi, %ecx # sched: [1:0.50]
-; SLM-NEXT: mwait # sched: [100:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_mwait:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movl %esi, %eax # sched: [1:0.33]
-; SANDY-SSE-NEXT: movl %edi, %ecx # sched: [1:0.33]
-; SANDY-SSE-NEXT: mwait # sched: [100:0.33]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_mwait:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl %esi, %eax # sched: [1:0.33]
-; SANDY-NEXT: movl %edi, %ecx # sched: [1:0.33]
-; SANDY-NEXT: mwait # sched: [100:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_mwait:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movl %esi, %eax # sched: [1:0.25]
-; HASWELL-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25]
-; HASWELL-SSE-NEXT: mwait # sched: [20:2.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_mwait:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl %esi, %eax # sched: [1:0.25]
-; HASWELL-NEXT: movl %edi, %ecx # sched: [1:0.25]
-; HASWELL-NEXT: mwait # sched: [20:2.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_mwait:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movl %esi, %eax # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: mwait # sched: [100:0.25]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_mwait:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl %esi, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: movl %edi, %ecx # sched: [1:0.25]
-; BROADWELL-NEXT: mwait # sched: [100:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_mwait:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movl %esi, %eax # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: mwait # sched: [20:2.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_mwait:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl %esi, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: movl %edi, %ecx # sched: [1:0.25]
-; SKYLAKE-NEXT: mwait # sched: [20:2.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_mwait:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movl %esi, %eax # sched: [1:0.25]
-; SKX-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25]
-; SKX-SSE-NEXT: mwait # sched: [20:2.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_mwait:
-; SKX: # %bb.0:
-; SKX-NEXT: movl %esi, %eax # sched: [1:0.25]
-; SKX-NEXT: movl %edi, %ecx # sched: [1:0.25]
-; SKX-NEXT: mwait # sched: [20:2.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_mwait:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movl %esi, %eax # sched: [1:0.50]
-; BDVER2-SSE-NEXT: movl %edi, %ecx # sched: [1:0.50]
-; BDVER2-SSE-NEXT: mwait # sched: [100:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_mwait:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl %esi, %eax # sched: [1:0.50]
-; BDVER2-NEXT: movl %edi, %ecx # sched: [1:0.50]
-; BDVER2-NEXT: mwait # sched: [100:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_mwait:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movl %esi, %eax # sched: [1:0.50]
-; BTVER2-SSE-NEXT: movl %edi, %ecx # sched: [1:0.50]
-; BTVER2-SSE-NEXT: mwait # sched: [100:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_mwait:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl %esi, %eax # sched: [1:0.50]
-; BTVER2-NEXT: movl %edi, %ecx # sched: [1:0.50]
-; BTVER2-NEXT: mwait # sched: [100:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_mwait:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movl %esi, %eax # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: mwait # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_mwait:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl %esi, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: movl %edi, %ecx # sched: [1:0.25]
-; ZNVER1-NEXT: mwait # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void @llvm.x86.sse3.mwait(i32 %a0, i32 %a1)
- ret void
-}
-declare void @llvm.x86.sse3.mwait(i32, i32)
diff --git a/llvm/test/CodeGen/X86/sse41-schedule.ll b/llvm/test/CodeGen/X86/sse41-schedule.ll
deleted file mode 100644
index c5a1c775f38..00000000000
--- a/llvm/test/CodeGen/X86/sse41-schedule.ll
+++ /dev/null
@@ -1,6248 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SLM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-avx2,-xop | FileCheck %s --check-prefixes=CHECK,BDVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1
-
-define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_blendpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.50]
-; GENERIC-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
-; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_blendpd:
-; SLM: # %bb.0:
-; SLM-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:1.00]
-; SLM-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [4:1.00]
-; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_blendpd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.50]
-; SANDY-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
-; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_blendpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vblendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.50]
-; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
-; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_blendpd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.33]
-; HASWELL-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
-; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_blendpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vblendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.33]
-; HASWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
-; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_blendpd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.33]
-; BROADWELL-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:0.50]
-; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_blendpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vblendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.33]
-; BROADWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:0.50]
-; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_blendpd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_blendpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vblendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.33]
-; SKYLAKE-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
-; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_blendpd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.33]
-; SKX-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
-; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_blendpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vblendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.33]
-; SKX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
-; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_blendpd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [2:0.50]
-; BDVER2-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
-; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_blendpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vblendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [2:0.50]
-; BDVER2-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
-; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_blendpd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.50]
-; BTVER2-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:1.00]
-; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_blendpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vblendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.50]
-; BTVER2-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:1.00]
-; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_blendpd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.50]
-; ZNVER1-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [8:0.50]
-; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_blendpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vblendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.50]
-; ZNVER1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [8:0.50]
-; ZNVER1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 3>
- %2 = load <2 x double>, <2 x double> *%a2, align 16
- %3 = shufflevector <2 x double> %1, <2 x double> %2, <2 x i32> <i32 0, i32 3>
- %4 = fadd <2 x double> %1, %3
- ret <2 x double> %4
-}
-
-define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
-; GENERIC-LABEL: test_blendps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
-; GENERIC-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
-; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_blendps:
-; SLM: # %bb.0:
-; SLM-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:1.00]
-; SLM-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [4:1.00]
-; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_blendps:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
-; SANDY-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
-; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_blendps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
-; SANDY-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
-; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_blendps:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
-; HASWELL-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
-; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_blendps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
-; HASWELL-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
-; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_blendps:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
-; BROADWELL-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [6:0.50]
-; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_blendps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
-; BROADWELL-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [6:0.50]
-; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_blendps:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_blendps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
-; SKYLAKE-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
-; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_blendps:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
-; SKX-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
-; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_blendps:
-; SKX: # %bb.0:
-; SKX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
-; SKX-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_blendps:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [2:0.50]
-; BDVER2-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
-; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_blendps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [2:0.50]
-; BDVER2-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
-; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_blendps:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
-; BTVER2-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [6:1.00]
-; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_blendps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
-; BTVER2-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [6:1.00]
-; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_blendps:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
-; ZNVER1-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [8:0.50]
-; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_blendps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
-; ZNVER1-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [8:0.50]
-; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
- %2 = load <4 x float>, <4 x float> *%a2, align 16
- %3 = shufflevector <4 x float> %a1, <4 x float> %2, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
- %4 = fadd <4 x float> %1, %3
- ret <4 x float> %4
-}
-
-define <2 x double> @test_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) {
-; GENERIC-LABEL: test_blendvpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movapd %xmm0, %xmm3 # sched: [1:1.00]
-; GENERIC-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:1.00]
-; GENERIC-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
-; GENERIC-NEXT: movapd %xmm3, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_blendvpd:
-; SLM: # %bb.0:
-; SLM-NEXT: movapd %xmm0, %xmm3 # sched: [1:0.50]
-; SLM-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [1:1.00]
-; SLM-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [4:1.00]
-; SLM-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_blendvpd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:1.00]
-; SANDY-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:1.00]
-; SANDY-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
-; SANDY-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_blendvpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SANDY-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_blendvpd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
-; HASWELL-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:2.00]
-; HASWELL-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_blendvpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
-; HASWELL-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_blendvpd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
-; BROADWELL-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [7:2.00]
-; BROADWELL-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_blendvpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
-; BROADWELL-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_blendvpd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:0.67]
-; SKYLAKE-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:0.67]
-; SKYLAKE-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_blendvpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
-; SKYLAKE-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_blendvpd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:0.33]
-; SKX-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:0.67]
-; SKX-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:0.67]
-; SKX-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_blendvpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
-; SKX-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_blendvpd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:0.50]
-; BDVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50]
-; BDVER2-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
-; BDVER2-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [7:2.00]
-; BDVER2-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_blendvpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
-; BDVER2-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_blendvpd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
-; BTVER2-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [7:2.00]
-; BTVER2-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_blendvpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
-; BTVER2-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_blendvpd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [1:0.50]
-; ZNVER1-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_blendvpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; ZNVER1-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
- %2 = load <2 x double>, <2 x double> *%a3, align 16
- %3 = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %1, <2 x double> %2, <2 x double> %a2)
- ret <2 x double> %3
-}
-declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
-
-define <4 x float> @test_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) {
-; GENERIC-LABEL: test_blendvps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00]
-; GENERIC-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:1.00]
-; GENERIC-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
-; GENERIC-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_blendvps:
-; SLM: # %bb.0:
-; SLM-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.50]
-; SLM-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [1:1.00]
-; SLM-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [4:1.00]
-; SLM-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_blendvps:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00]
-; SANDY-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:1.00]
-; SANDY-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
-; SANDY-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_blendvps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SANDY-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_blendvps:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
-; HASWELL-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:2.00]
-; HASWELL-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_blendvps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
-; HASWELL-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_blendvps:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
-; BROADWELL-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [7:2.00]
-; BROADWELL-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_blendvps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
-; BROADWELL-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_blendvps:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:0.67]
-; SKYLAKE-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:0.67]
-; SKYLAKE-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_blendvps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
-; SKYLAKE-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_blendvps:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.33]
-; SKX-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:0.67]
-; SKX-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:0.67]
-; SKX-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_blendvps:
-; SKX: # %bb.0:
-; SKX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
-; SKX-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_blendvps:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.50]
-; BDVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50]
-; BDVER2-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
-; BDVER2-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [7:2.00]
-; BDVER2-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_blendvps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
-; BDVER2-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_blendvps:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
-; BTVER2-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [7:2.00]
-; BTVER2-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_blendvps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
-; BTVER2-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_blendvps:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [1:0.50]
-; ZNVER1-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_blendvps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; ZNVER1-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
- %2 = load <4 x float>, <4 x float> *%a3
- %3 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %1, <4 x float> %2, <4 x float> %a2)
- ret <4 x float> %3
-}
-declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
-
-define <2 x double> @test_dppd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_dppd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: dppd $7, (%rdi), %xmm0 # sched: [15:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_dppd:
-; SLM: # %bb.0:
-; SLM-NEXT: dppd $7, %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: dppd $7, (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_dppd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [15:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_dppd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_dppd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00]
-; HASWELL-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [15:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_dppd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
-; HASWELL-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_dppd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00]
-; BROADWELL-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [14:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_dppd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
-; BROADWELL-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [14:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_dppd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00]
-; SKYLAKE-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [15:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_dppd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
-; SKYLAKE-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_dppd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00]
-; SKX-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [15:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_dppd:
-; SKX: # %bb.0:
-; SKX-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
-; SKX-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_dppd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [15:1.50]
-; BDVER2-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [20:1.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_dppd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [15:1.50]
-; BDVER2-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [20:1.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_dppd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:3.00]
-; BTVER2-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [14:3.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_dppd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:3.00]
-; BTVER2-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [14:3.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_dppd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_dppd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7)
- %2 = load <2 x double>, <2 x double> *%a2, align 16
- %3 = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %1, <2 x double> %2, i8 7)
- ret <2 x double> %3
-}
-declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone
-
-define <4 x float> @test_dpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
-; GENERIC-LABEL: test_dpps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: dpps $7, %xmm1, %xmm0 # sched: [12:2.00]
-; GENERIC-NEXT: dpps $7, (%rdi), %xmm0 # sched: [18:2.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_dpps:
-; SLM: # %bb.0:
-; SLM-NEXT: dpps $7, %xmm1, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: dpps $7, (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_dpps:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [12:2.00]
-; SANDY-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [18:2.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_dpps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [12:2.00]
-; SANDY-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [18:2.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_dpps:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [14:2.00]
-; HASWELL-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [20:2.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_dpps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [14:2.00]
-; HASWELL-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [20:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_dpps:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [14:2.00]
-; BROADWELL-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [19:2.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_dpps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [14:2.00]
-; BROADWELL-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [19:2.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_dpps:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [13:1.50]
-; SKYLAKE-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [19:1.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_dpps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [13:1.50]
-; SKYLAKE-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [19:1.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_dpps:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [13:1.33]
-; SKX-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [19:1.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_dpps:
-; SKX: # %bb.0:
-; SKX-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [13:1.33]
-; SKX-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [19:1.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_dpps:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [25:1.50]
-; BDVER2-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [30:1.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_dpps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [25:1.50]
-; BDVER2-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [30:1.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_dpps:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [11:3.00]
-; BTVER2-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [16:3.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_dpps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [11:3.00]
-; BTVER2-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [16:3.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_dpps:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_dpps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7)
- %2 = load <4 x float>, <4 x float> *%a2, align 16
- %3 = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %1, <4 x float> %2, i8 7)
- ret <4 x float> %3
-}
-declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone
-
-define i32 @test_extractps(<4 x float> %a0, i32 *%a1) {
-; GENERIC-LABEL: test_extractps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: extractps $3, %xmm0, %eax # sched: [3:1.00]
-; GENERIC-NEXT: extractps $1, %xmm0, (%rdi) # sched: [5:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_extractps:
-; SLM: # %bb.0:
-; SLM-NEXT: extractps $3, %xmm0, %eax # sched: [1:1.00]
-; SLM-NEXT: extractps $1, %xmm0, (%rdi) # sched: [4:2.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_extractps:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [3:1.00]
-; SANDY-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [5:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_extractps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vextractps $3, %xmm0, %eax # sched: [3:1.00]
-; SANDY-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [5:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_extractps:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [2:1.00]
-; HASWELL-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [2:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_extractps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vextractps $3, %xmm0, %eax # sched: [2:1.00]
-; HASWELL-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [2:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_extractps:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [2:1.00]
-; BROADWELL-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [2:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_extractps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vextractps $3, %xmm0, %eax # sched: [2:1.00]
-; BROADWELL-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [2:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_extractps:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [3:1.00]
-; SKYLAKE-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_extractps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vextractps $3, %xmm0, %eax # sched: [3:1.00]
-; SKYLAKE-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_extractps:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [3:1.00]
-; SKX-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [2:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_extractps:
-; SKX: # %bb.0:
-; SKX-NEXT: vextractps $3, %xmm0, %eax # sched: [3:1.00]
-; SKX-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_extractps:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [13:1.00]
-; BDVER2-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [13:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_extractps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vextractps $3, %xmm0, %eax # sched: [13:1.00]
-; BDVER2-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [13:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_extractps:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [3:1.00]
-; BTVER2-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_extractps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vextractps $3, %xmm0, %eax # sched: [3:1.00]
-; BTVER2-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_extractps:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [2:2.00]
-; ZNVER1-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [5:2.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_extractps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vextractps $3, %xmm0, %eax # sched: [2:2.00]
-; ZNVER1-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [5:2.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = extractelement <4 x float> %a0, i32 3
- %2 = extractelement <4 x float> %a0, i32 1
- %3 = bitcast float %1 to i32
- %4 = bitcast float %2 to i32
- store i32 %4, i32 *%a1
- ret i32 %3
-}
-
-define <4 x float> @test_insertps(<4 x float> %a0, <4 x float> %a1, float *%a2) {
-; GENERIC-LABEL: test_insertps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
-; GENERIC-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_insertps:
-; SLM: # %bb.0:
-; SLM-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
-; SLM-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_insertps:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
-; SANDY-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_insertps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
-; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_insertps:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
-; HASWELL-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_insertps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
-; HASWELL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_insertps:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
-; BROADWELL-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_insertps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
-; BROADWELL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_insertps:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_insertps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
-; SKYLAKE-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_insertps:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
-; SKX-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_insertps:
-; SKX: # %bb.0:
-; SKX-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
-; SKX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_insertps:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [2:0.50]
-; BDVER2-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_insertps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [2:0.50]
-; BDVER2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_insertps:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50]
-; BTVER2-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_insertps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50]
-; BTVER2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_insertps:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50]
-; ZNVER1-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_insertps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50]
-; ZNVER1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 17)
- %2 = load float, float *%a2
- %3 = insertelement <4 x float> %1, float %2, i32 3
- ret <4 x float> %3
-}
-declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
-
-define <2 x i64> @test_movntdqa(i8* %a0) {
-; GENERIC-LABEL: test_movntdqa:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movntdqa (%rdi), %xmm0 # sched: [6:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_movntdqa:
-; SLM: # %bb.0:
-; SLM-NEXT: movntdqa (%rdi), %xmm0 # sched: [3:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_movntdqa:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [6:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_movntdqa:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_movntdqa:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [6:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_movntdqa:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_movntdqa:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [5:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_movntdqa:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [5:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_movntdqa:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [6:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_movntdqa:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_movntdqa:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [6:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_movntdqa:
-; SKX: # %bb.0:
-; SKX-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_movntdqa:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [5:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_movntdqa:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_movntdqa:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [5:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_movntdqa:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [5:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_movntdqa:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_movntdqa:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <2 x i64> @llvm.x86.sse41.movntdqa(i8* %a0)
- ret <2 x i64> %1
-}
-declare <2 x i64> @llvm.x86.sse41.movntdqa(i8*) nounwind readnone
-
-define <8 x i16> @test_mpsadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
-; GENERIC-LABEL: test_mpsadbw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [13:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_mpsadbw:
-; SLM: # %bb.0:
-; SLM-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [7:1.00]
-; SLM-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [10:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_mpsadbw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [7:1.00]
-; SANDY-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [13:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_mpsadbw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
-; SANDY-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_mpsadbw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [7:2.00]
-; HASWELL-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [13:2.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_mpsadbw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:2.00]
-; HASWELL-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [13:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_mpsadbw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [7:2.00]
-; BROADWELL-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [12:2.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_mpsadbw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:2.00]
-; BROADWELL-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_mpsadbw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [4:2.00]
-; SKYLAKE-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [10:2.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_mpsadbw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [4:2.00]
-; SKYLAKE-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_mpsadbw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [4:2.00]
-; SKX-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [10:2.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_mpsadbw:
-; SKX: # %bb.0:
-; SKX-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [4:2.00]
-; SKX-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_mpsadbw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [9:2.00]
-; BDVER2-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [14:2.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_mpsadbw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [9:2.00]
-; BDVER2-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [14:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_mpsadbw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [3:2.00]
-; BTVER2-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [8:2.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_mpsadbw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; BTVER2-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_mpsadbw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_mpsadbw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7)
- %2 = bitcast <8 x i16> %1 to <16 x i8>
- %3 = load <16 x i8>, <16 x i8> *%a2, align 16
- %4 = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %2, <16 x i8> %3, i8 7)
- ret <8 x i16> %4
-}
-declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone
-
-define <8 x i16> @test_packusdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_packusdw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: packusdw %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: packusdw (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_packusdw:
-; SLM: # %bb.0:
-; SLM-NEXT: packusdw %xmm1, %xmm0 # sched: [1:1.00]
-; SLM-NEXT: packusdw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_packusdw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_packusdw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_packusdw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [7:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_packusdw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_packusdw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [6:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_packusdw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_packusdw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [7:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_packusdw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_packusdw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:1.00]
-; SKX-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [7:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_packusdw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SKX-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_packusdw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_packusdw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_packusdw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_packusdw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_packusdw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_packusdw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1)
- %2 = bitcast <8 x i16> %1 to <4 x i32>
- %3 = load <4 x i32>, <4 x i32> *%a2, align 16
- %4 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %2, <4 x i32> %3)
- ret <8 x i16> %4
-}
-declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
-
-define <16 x i8> @test_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2, <16 x i8> *%a3) {
-; GENERIC-LABEL: test_pblendvb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33]
-; GENERIC-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:1.00]
-; GENERIC-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
-; GENERIC-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_pblendvb:
-; SLM: # %bb.0:
-; SLM-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.50]
-; SLM-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [1:1.00]
-; SLM-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [4:1.00]
-; SLM-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pblendvb:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33]
-; SANDY-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
-; SANDY-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:1.00]
-; SANDY-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
-; SANDY-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pblendvb:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; SANDY-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pblendvb:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33]
-; HASWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
-; HASWELL-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:2.00]
-; HASWELL-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pblendvb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
-; HASWELL-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pblendvb:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
-; BROADWELL-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [7:2.00]
-; BROADWELL-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pblendvb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
-; BROADWELL-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pblendvb:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:0.67]
-; SKYLAKE-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:0.67]
-; SKYLAKE-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pblendvb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
-; SKYLAKE-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pblendvb:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33]
-; SKX-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:0.67]
-; SKX-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:0.67]
-; SKX-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pblendvb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
-; SKX-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pblendvb:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50]
-; BDVER2-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
-; BDVER2-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [7:2.00]
-; BDVER2-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pblendvb:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
-; BDVER2-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pblendvb:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
-; BTVER2-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [7:2.00]
-; BTVER2-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pblendvb:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
-; BTVER2-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pblendvb:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [1:1.00]
-; ZNVER1-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
-; ZNVER1-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pblendvb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; ZNVER1-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2)
- %2 = load <16 x i8>, <16 x i8> *%a3, align 16
- %3 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %1, <16 x i8> %2, <16 x i8> %a2)
- ret <16 x i8> %3
-}
-declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
-
-define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_pblendw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
-; GENERIC-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:0.50]
-; GENERIC-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_pblendw:
-; SLM: # %bb.0:
-; SLM-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
-; SLM-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [4:1.00]
-; SLM-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pblendw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
-; SANDY-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:0.50]
-; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pblendw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
-; SANDY-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:0.50]
-; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pblendw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
-; HASWELL-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00]
-; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pblendw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
-; HASWELL-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00]
-; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pblendw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
-; BROADWELL-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [6:1.00]
-; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pblendw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
-; BROADWELL-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [6:1.00]
-; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pblendw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00]
-; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pblendw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
-; SKYLAKE-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00]
-; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pblendw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
-; SKX-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00]
-; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pblendw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
-; SKX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00]
-; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pblendw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [2:0.50]
-; BDVER2-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:0.50]
-; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pblendw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [2:0.50]
-; BDVER2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:0.50]
-; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pblendw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
-; BTVER2-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [6:1.00]
-; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pblendw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
-; BTVER2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [6:1.00]
-; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pblendw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.33]
-; ZNVER1-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [8:0.50]
-; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pblendw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.33]
-; ZNVER1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [8:0.50]
-; ZNVER1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
- %2 = load <8 x i16>, <8 x i16> *%a2, align 16
- %3 = shufflevector <8 x i16> %a1, <8 x i16> %2, <8 x i32> <i32 0, i32 1, i32 10, i32 11, i32 4, i32 5, i32 6, i32 15>
- %4 = add <8 x i16> %1, %3
- ret <8 x i16> %4
-}
-
-define <2 x i64> @test_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
-; GENERIC-LABEL: test_pcmpeqq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_pcmpeqq:
-; SLM: # %bb.0:
-; SLM-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pcmpeqq:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pcmpeqq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pcmpeqq:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pcmpeqq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pcmpeqq:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pcmpeqq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pcmpeqq:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pcmpeqq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pcmpeqq:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pcmpeqq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pcmpeqq:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pcmpeqq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pcmpeqq:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pcmpeqq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pcmpeqq:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pcmpeqq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = icmp eq <2 x i64> %a0, %a1
- %2 = sext <2 x i1> %1 to <2 x i64>
- %3 = load <2 x i64>, <2 x i64>*%a2, align 16
- %4 = icmp eq <2 x i64> %2, %3
- %5 = sext <2 x i1> %4 to <2 x i64>
- ret <2 x i64> %5
-}
-
-define i32 @test_pextrb(<16 x i8> %a0, i8 *%a1) {
-; GENERIC-LABEL: test_pextrb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pextrb $3, %xmm0, %eax # sched: [3:1.00]
-; GENERIC-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [5:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_pextrb:
-; SLM: # %bb.0:
-; SLM-NEXT: pextrb $3, %xmm0, %eax # sched: [1:1.00]
-; SLM-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [4:2.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pextrb:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [3:1.00]
-; SANDY-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [5:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pextrb:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00]
-; SANDY-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [5:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pextrb:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [2:1.00]
-; HASWELL-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pextrb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpextrb $3, %xmm0, %eax # sched: [2:1.00]
-; HASWELL-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pextrb:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [2:1.00]
-; BROADWELL-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pextrb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpextrb $3, %xmm0, %eax # sched: [2:1.00]
-; BROADWELL-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pextrb:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [3:1.00]
-; SKYLAKE-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pextrb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00]
-; SKYLAKE-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pextrb:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [3:1.00]
-; SKX-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pextrb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00]
-; SKX-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pextrb:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [13:1.00]
-; BDVER2-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [13:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pextrb:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpextrb $3, %xmm0, %eax # sched: [13:1.00]
-; BDVER2-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [13:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pextrb:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [3:1.00]
-; BTVER2-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pextrb:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00]
-; BTVER2-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pextrb:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [2:2.00]
-; ZNVER1-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [5:3.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pextrb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpextrb $3, %xmm0, %eax # sched: [2:2.00]
-; ZNVER1-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [5:3.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = extractelement <16 x i8> %a0, i32 3
- %2 = extractelement <16 x i8> %a0, i32 1
- store i8 %2, i8 *%a1
- %3 = zext i8 %1 to i32
- ret i32 %3
-}
-
-define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) {
-; GENERIC-LABEL: test_pextrd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: pextrd $3, %xmm0, %eax # sched: [3:1.00]
-; GENERIC-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [5:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_pextrd:
-; SLM: # %bb.0:
-; SLM-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: pextrd $3, %xmm0, %eax # sched: [1:1.00]
-; SLM-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [4:2.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pextrd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [3:1.00]
-; SANDY-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [5:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pextrd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00]
-; SANDY-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [5:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pextrd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [2:1.00]
-; HASWELL-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pextrd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpextrd $3, %xmm0, %eax # sched: [2:1.00]
-; HASWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pextrd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [2:1.00]
-; BROADWELL-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pextrd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpextrd $3, %xmm0, %eax # sched: [2:1.00]
-; BROADWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pextrd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [3:1.00]
-; SKYLAKE-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pextrd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00]
-; SKYLAKE-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pextrd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [3:1.00]
-; SKX-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pextrd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00]
-; SKX-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pextrd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [13:1.00]
-; BDVER2-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [13:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pextrd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpextrd $3, %xmm0, %eax # sched: [13:1.00]
-; BDVER2-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [13:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pextrd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [3:1.00]
-; BTVER2-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pextrd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00]
-; BTVER2-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pextrd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [2:2.00]
-; ZNVER1-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [5:3.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pextrd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpextrd $3, %xmm0, %eax # sched: [2:2.00]
-; ZNVER1-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [5:3.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = add <4 x i32> %a0, %a0
- %2 = extractelement <4 x i32> %1, i32 3
- %3 = extractelement <4 x i32> %1, i32 1
- store i32 %3, i32 *%a1
- ret i32 %2
-}
-
-define i64 @test_pextrq(<2 x i64> %a0, <2 x i64> %a1, i64 *%a2) {
-; GENERIC-LABEL: test_pextrq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pextrq $1, %xmm0, %rax # sched: [3:1.00]
-; GENERIC-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_pextrq:
-; SLM: # %bb.0:
-; SLM-NEXT: pextrq $1, %xmm0, %rax # sched: [1:1.00]
-; SLM-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [4:2.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pextrq:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [3:1.00]
-; SANDY-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pextrq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00]
-; SANDY-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pextrq:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [2:1.00]
-; HASWELL-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pextrq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpextrq $1, %xmm0, %rax # sched: [2:1.00]
-; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pextrq:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [2:1.00]
-; BROADWELL-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pextrq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpextrq $1, %xmm0, %rax # sched: [2:1.00]
-; BROADWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pextrq:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [3:1.00]
-; SKYLAKE-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pextrq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00]
-; SKYLAKE-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pextrq:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [3:1.00]
-; SKX-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pextrq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00]
-; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pextrq:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [13:1.00]
-; BDVER2-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [13:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pextrq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpextrq $1, %xmm0, %rax # sched: [13:1.00]
-; BDVER2-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [13:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pextrq:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [3:1.00]
-; BTVER2-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pextrq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00]
-; BTVER2-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pextrq:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [2:2.00]
-; ZNVER1-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [5:3.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pextrq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpextrq $1, %xmm0, %rax # sched: [2:2.00]
-; ZNVER1-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:3.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = extractelement <2 x i64> %a0, i32 1
- %2 = extractelement <2 x i64> %a0, i32 1
- store i64 %2, i64 *%a2
- ret i64 %1
-}
-
-define i32 @test_pextrw(<8 x i16> %a0, i16 *%a1) {
-; GENERIC-LABEL: test_pextrw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00]
-; GENERIC-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [5:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_pextrw:
-; SLM: # %bb.0:
-; SLM-NEXT: pextrw $3, %xmm0, %eax # sched: [1:1.00]
-; SLM-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [4:2.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pextrw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00]
-; SANDY-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [5:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pextrw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00]
-; SANDY-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [5:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pextrw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [2:1.00]
-; HASWELL-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pextrw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpextrw $3, %xmm0, %eax # sched: [2:1.00]
-; HASWELL-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pextrw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [2:1.00]
-; BROADWELL-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pextrw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpextrw $3, %xmm0, %eax # sched: [2:1.00]
-; BROADWELL-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pextrw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00]
-; SKYLAKE-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pextrw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00]
-; SKYLAKE-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pextrw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00]
-; SKX-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pextrw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00]
-; SKX-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pextrw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [13:1.00]
-; BDVER2-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [13:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pextrw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpextrw $3, %xmm0, %eax # sched: [13:1.00]
-; BDVER2-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [13:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pextrw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00]
-; BTVER2-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pextrw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00]
-; BTVER2-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pextrw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [2:2.00]
-; ZNVER1-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [5:3.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pextrw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpextrw $3, %xmm0, %eax # sched: [2:2.00]
-; ZNVER1-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [5:3.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = extractelement <8 x i16> %a0, i32 3
- %2 = extractelement <8 x i16> %a0, i32 1
- store i16 %2, i16 *%a1
- %3 = zext i16 %1 to i32
- ret i32 %3
-}
-
-define <8 x i16> @test_phminposuw(<8 x i16> *%a0) {
-; GENERIC-LABEL: test_phminposuw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: phminposuw (%rdi), %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT: phminposuw %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_phminposuw:
-; SLM: # %bb.0:
-; SLM-NEXT: phminposuw (%rdi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_phminposuw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [11:1.00]
-; SANDY-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_phminposuw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vphminposuw (%rdi), %xmm0 # sched: [11:1.00]
-; SANDY-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_phminposuw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [11:1.00]
-; HASWELL-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_phminposuw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vphminposuw (%rdi), %xmm0 # sched: [11:1.00]
-; HASWELL-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_phminposuw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [10:1.00]
-; BROADWELL-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [5:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_phminposuw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vphminposuw (%rdi), %xmm0 # sched: [10:1.00]
-; BROADWELL-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_phminposuw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_phminposuw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vphminposuw (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_phminposuw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [10:1.00]
-; SKX-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_phminposuw:
-; SKX: # %bb.0:
-; SKX-NEXT: vphminposuw (%rdi), %xmm0 # sched: [10:1.00]
-; SKX-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_phminposuw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [9:1.00]
-; BDVER2-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_phminposuw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vphminposuw (%rdi), %xmm0 # sched: [9:1.00]
-; BDVER2-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_phminposuw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_phminposuw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vphminposuw (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: vphminposuw %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_phminposuw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [11:1.00]
-; ZNVER1-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_phminposuw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vphminposuw (%rdi), %xmm0 # sched: [11:1.00]
-; ZNVER1-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load <8 x i16>, <8 x i16> *%a0, align 16
- %2 = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %1)
- %3 = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %2)
- ret <8 x i16> %3
-}
-declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone
-
-define <16 x i8> @test_pinsrb(<16 x i8> %a0, i8 %a1, i8 *%a2) {
-; GENERIC-LABEL: test_pinsrb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:1.00]
-; GENERIC-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_pinsrb:
-; SLM: # %bb.0:
-; SLM-NEXT: pinsrb $1, %edi, %xmm0 # sched: [1:1.00]
-; SLM-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pinsrb:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:1.00]
-; SANDY-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pinsrb:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:1.00]
-; SANDY-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pinsrb:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:2.00]
-; HASWELL-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pinsrb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
-; HASWELL-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pinsrb:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:2.00]
-; BROADWELL-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pinsrb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
-; BROADWELL-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pinsrb:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:2.00]
-; SKYLAKE-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pinsrb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
-; SKYLAKE-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pinsrb:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:2.00]
-; SKX-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pinsrb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
-; SKX-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pinsrb:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [12:0.50]
-; BDVER2-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [6:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pinsrb:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [12:0.50]
-; BDVER2-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pinsrb:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [7:0.50]
-; BTVER2-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [4:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pinsrb:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [7:0.50]
-; BTVER2-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pinsrb:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pinsrb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = insertelement <16 x i8> %a0, i8 %a1, i32 1
- %2 = load i8, i8 *%a2
- %3 = insertelement <16 x i8> %1, i8 %2, i32 3
- ret <16 x i8> %3
-}
-
-define <4 x i32> @test_pinsrd(<4 x i32> %a0, i32 %a1, i32 *%a2) {
-; GENERIC-LABEL: test_pinsrd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:1.00]
-; GENERIC-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_pinsrd:
-; SLM: # %bb.0:
-; SLM-NEXT: pinsrd $1, %edi, %xmm0 # sched: [1:1.00]
-; SLM-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pinsrd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:1.00]
-; SANDY-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pinsrd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:1.00]
-; SANDY-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pinsrd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:2.00]
-; HASWELL-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pinsrd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
-; HASWELL-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pinsrd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:2.00]
-; BROADWELL-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pinsrd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
-; BROADWELL-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pinsrd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:2.00]
-; SKYLAKE-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pinsrd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
-; SKYLAKE-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pinsrd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:2.00]
-; SKX-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pinsrd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
-; SKX-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pinsrd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [12:0.50]
-; BDVER2-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [6:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pinsrd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [12:0.50]
-; BDVER2-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pinsrd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [7:0.50]
-; BTVER2-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [4:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pinsrd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [7:0.50]
-; BTVER2-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [4:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pinsrd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pinsrd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = insertelement <4 x i32> %a0, i32 %a1, i32 1
- %2 = load i32, i32 *%a2
- %3 = insertelement <4 x i32> %1, i32 %2, i32 3
- ret <4 x i32> %3
-}
-
-define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) {
-; GENERIC-LABEL: test_pinsrq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:1.00]
-; GENERIC-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [7:0.50]
-; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_pinsrq:
-; SLM: # %bb.0:
-; SLM-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [4:1.00]
-; SLM-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [1:1.00]
-; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pinsrq:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:1.00]
-; SANDY-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [7:0.50]
-; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pinsrq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:1.00]
-; SANDY-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [7:0.50]
-; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pinsrq:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:2.00]
-; HASWELL-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00]
-; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pinsrq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00]
-; HASWELL-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [6:1.00]
-; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pinsrq:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:2.00]
-; BROADWELL-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00]
-; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pinsrq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00]
-; BROADWELL-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [6:1.00]
-; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pinsrq:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:2.00]
-; SKYLAKE-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00]
-; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pinsrq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00]
-; SKYLAKE-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [6:1.00]
-; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pinsrq:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:2.00]
-; SKX-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00]
-; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pinsrq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00]
-; SKX-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [6:1.00]
-; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pinsrq:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [6:0.50]
-; BDVER2-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [12:0.50]
-; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pinsrq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [6:0.50]
-; BDVER2-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [12:0.50]
-; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pinsrq:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [4:1.00]
-; BTVER2-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [7:0.50]
-; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pinsrq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [4:1.00]
-; BTVER2-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [7:0.50]
-; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pinsrq:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pinsrq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [8:0.50]
-; ZNVER1-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = insertelement <2 x i64> %a0, i64 %a2, i32 1
- %2 = load i64, i64 *%a3
- %3 = insertelement <2 x i64> %a1, i64 %2, i32 1
- %4 = add <2 x i64> %1, %3
- ret <2 x i64> %4
-}
-
-define <16 x i8> @test_pmaxsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
-; GENERIC-LABEL: test_pmaxsb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_pmaxsb:
-; SLM: # %bb.0:
-; SLM-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: pmaxsb (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pmaxsb:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pmaxsb:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pmaxsb:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pmaxsb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pmaxsb:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmaxsb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pmaxsb:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmaxsb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pmaxsb:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmaxsb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pmaxsb:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pmaxsb:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pmaxsb:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pmaxsb:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pmaxsb:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pmaxsb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1)
- %2 = load <16 x i8>, <16 x i8> *%a2, align 16
- %3 = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %1, <16 x i8> %2)
- ret <16 x i8> %3
-}
-declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
-
-define <4 x i32> @test_pmaxsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_pmaxsd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_pmaxsd:
-; SLM: # %bb.0:
-; SLM-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: pmaxsd (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pmaxsd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pmaxsd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pmaxsd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pmaxsd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pmaxsd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmaxsd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pmaxsd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmaxsd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pmaxsd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmaxsd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pmaxsd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pmaxsd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pmaxsd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pmaxsd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pmaxsd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pmaxsd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1)
- %2 = load <4 x i32>, <4 x i32> *%a2, align 16
- %3 = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %1, <4 x i32> %2)
- ret <4 x i32> %3
-}
-declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
-
-define <4 x i32> @test_pmaxud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_pmaxud:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_pmaxud:
-; SLM: # %bb.0:
-; SLM-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: pmaxud (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pmaxud:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pmaxud:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pmaxud:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pmaxud:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pmaxud:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmaxud:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pmaxud:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmaxud:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pmaxud:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmaxud:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pmaxud:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pmaxud:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pmaxud:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pmaxud:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pmaxud:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pmaxud:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1)
- %2 = load <4 x i32>, <4 x i32> *%a2, align 16
- %3 = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %1, <4 x i32> %2)
- ret <4 x i32> %3
-}
-declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
-
-define <8 x i16> @test_pmaxuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_pmaxuw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_pmaxuw:
-; SLM: # %bb.0:
-; SLM-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: pmaxuw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pmaxuw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pmaxuw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pmaxuw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pmaxuw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pmaxuw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmaxuw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pmaxuw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmaxuw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pmaxuw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmaxuw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pmaxuw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pmaxuw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pmaxuw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pmaxuw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pmaxuw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pmaxuw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1)
- %2 = load <8 x i16>, <8 x i16> *%a2, align 16
- %3 = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %1, <8 x i16> %2)
- ret <8 x i16> %3
-}
-declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
-
-define <16 x i8> @test_pminsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
-; GENERIC-LABEL: test_pminsb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_pminsb:
-; SLM: # %bb.0:
-; SLM-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: pminsb (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pminsb:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pminsb:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pminsb:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pminsb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pminsb:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pminsb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pminsb:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pminsb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pminsb:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pminsb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pminsb:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pminsb:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pminsb:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pminsb:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pminsb:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pminsb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1)
- %2 = load <16 x i8>, <16 x i8> *%a2, align 16
- %3 = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %1, <16 x i8> %2)
- ret <16 x i8> %3
-}
-declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
-
-define <4 x i32> @test_pminsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_pminsd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_pminsd:
-; SLM: # %bb.0:
-; SLM-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: pminsd (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pminsd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pminsd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pminsd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pminsd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pminsd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pminsd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pminsd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pminsd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pminsd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pminsd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pminsd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pminsd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pminsd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pminsd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pminsd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pminsd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1)
- %2 = load <4 x i32>, <4 x i32> *%a2, align 16
- %3 = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %1, <4 x i32> %2)
- ret <4 x i32> %3
-}
-declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
-
-define <4 x i32> @test_pminud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_pminud:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_pminud:
-; SLM: # %bb.0:
-; SLM-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: pminud (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pminud:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pminud:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pminud:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pminud:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pminud:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pminud:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pminud:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pminud:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pminud:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pminud:
-; SKX: # %bb.0:
-; SKX-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pminud:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pminud:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pminud:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pminud:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pminud:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pminud:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1)
- %2 = load <4 x i32>, <4 x i32> *%a2, align 16
- %3 = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %1, <4 x i32> %2)
- ret <4 x i32> %3
-}
-declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
-
-define <8 x i16> @test_pminuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_pminuw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_pminuw:
-; SLM: # %bb.0:
-; SLM-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: pminuw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pminuw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pminuw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pminuw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pminuw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pminuw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pminuw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pminuw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pminuw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pminuw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pminuw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pminuw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pminuw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pminuw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pminuw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pminuw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pminuw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1)
- %2 = load <8 x i16>, <8 x i16> *%a2, align 16
- %3 = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %1, <8 x i16> %2)
- ret <8 x i16> %3
-}
-declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
-
-define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) {
-; GENERIC-LABEL: test_pmovsxbw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:0.50]
-; GENERIC-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_pmovsxbw:
-; SLM: # %bb.0:
-; SLM-NEXT: pmovsxbw (%rdi), %xmm1 # sched: [4:1.00]
-; SLM-NEXT: pmovsxbw %xmm0, %xmm0 # sched: [1:1.00]
-; SLM-NEXT: paddw %xmm0, %xmm1 # sched: [1:0.50]
-; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pmovsxbw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pmovsxbw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [7:0.50]
-; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pmovsxbw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [6:1.00]
-; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pmovsxbw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [6:1.00]
-; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pmovsxbw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [6:1.00]
-; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmovsxbw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [6:1.00]
-; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pmovsxbw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [6:1.00]
-; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmovsxbw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [6:1.00]
-; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pmovsxbw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:1.00]
-; SKX-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [6:1.00]
-; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmovsxbw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00]
-; SKX-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [6:1.00]
-; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pmovsxbw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pmovsxbw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [7:0.50]
-; BDVER2-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pmovsxbw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pmovsxbw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [6:1.00]
-; BTVER2-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pmovsxbw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pmovsxbw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [8:0.50]
-; ZNVER1-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
- %2 = sext <8 x i8> %1 to <8 x i16>
- %3 = load <8 x i8>, <8 x i8>* %a1, align 1
- %4 = sext <8 x i8> %3 to <8 x i16>
- %5 = add <8 x i16> %2, %4
- ret <8 x i16> %5
-}
-
-define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) {
-; GENERIC-LABEL: test_pmovsxbd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:0.50]
-; GENERIC-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_pmovsxbd:
-; SLM: # %bb.0:
-; SLM-NEXT: pmovsxbd (%rdi), %xmm1 # sched: [4:1.00]
-; SLM-NEXT: pmovsxbd %xmm0, %xmm0 # sched: [1:1.00]
-; SLM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
-; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pmovsxbd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pmovsxbd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [7:0.50]
-; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pmovsxbd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [6:1.00]
-; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pmovsxbd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [6:1.00]
-; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pmovsxbd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [6:1.00]
-; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmovsxbd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [6:1.00]
-; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pmovsxbd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [6:1.00]
-; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmovsxbd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [6:1.00]
-; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pmovsxbd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:1.00]
-; SKX-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [6:1.00]
-; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmovsxbd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00]
-; SKX-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [6:1.00]
-; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pmovsxbd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pmovsxbd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [7:0.50]
-; BDVER2-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pmovsxbd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pmovsxbd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [6:1.00]
-; BTVER2-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pmovsxbd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pmovsxbd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [8:0.50]
-; ZNVER1-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %2 = sext <4 x i8> %1 to <4 x i32>
- %3 = load <4 x i8>, <4 x i8>* %a1, align 1
- %4 = sext <4 x i8> %3 to <4 x i32>
- %5 = add <4 x i32> %2, %4
- ret <4 x i32> %5
-}
-
-define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) {
-; GENERIC-LABEL: test_pmovsxbq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:0.50]
-; GENERIC-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_pmovsxbq:
-; SLM: # %bb.0:
-; SLM-NEXT: pmovsxbq (%rdi), %xmm1 # sched: [4:1.00]
-; SLM-NEXT: pmovsxbq %xmm0, %xmm0 # sched: [1:1.00]
-; SLM-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
-; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pmovsxbq:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pmovsxbq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [7:0.50]
-; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pmovsxbq:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [6:1.00]
-; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pmovsxbq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [6:1.00]
-; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pmovsxbq:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [6:1.00]
-; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmovsxbq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [6:1.00]
-; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pmovsxbq:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [6:1.00]
-; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmovsxbq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [6:1.00]
-; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pmovsxbq:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:1.00]
-; SKX-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [6:1.00]
-; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmovsxbq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00]
-; SKX-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [6:1.00]
-; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pmovsxbq:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pmovsxbq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [7:0.50]
-; BDVER2-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pmovsxbq:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pmovsxbq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [6:1.00]
-; BTVER2-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pmovsxbq:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pmovsxbq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [8:0.50]
-; ZNVER1-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
- %2 = sext <2 x i8> %1 to <2 x i64>
- %3 = load <2 x i8>, <2 x i8>* %a1, align 1
- %4 = sext <2 x i8> %3 to <2 x i64>
- %5 = add <2 x i64> %2, %4
- ret <2 x i64> %5
-}
-
-define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) {
-; GENERIC-LABEL: test_pmovsxdq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:0.50]
-; GENERIC-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_pmovsxdq:
-; SLM: # %bb.0:
-; SLM-NEXT: pmovsxdq (%rdi), %xmm1 # sched: [4:1.00]
-; SLM-NEXT: pmovsxdq %xmm0, %xmm0 # sched: [1:1.00]
-; SLM-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
-; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pmovsxdq:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pmovsxdq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [7:0.50]
-; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pmovsxdq:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [6:1.00]
-; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pmovsxdq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [6:1.00]
-; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pmovsxdq:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [6:1.00]
-; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmovsxdq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [6:1.00]
-; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pmovsxdq:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [6:1.00]
-; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmovsxdq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [6:1.00]
-; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pmovsxdq:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:1.00]
-; SKX-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [6:1.00]
-; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmovsxdq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00]
-; SKX-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [6:1.00]
-; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pmovsxdq:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pmovsxdq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [7:0.50]
-; BDVER2-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pmovsxdq:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pmovsxdq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [6:1.00]
-; BTVER2-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pmovsxdq:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pmovsxdq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [8:0.50]
-; ZNVER1-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
- %2 = sext <2 x i32> %1 to <2 x i64>
- %3 = load <2 x i32>, <2 x i32>* %a1, align 1
- %4 = sext <2 x i32> %3 to <2 x i64>
- %5 = add <2 x i64> %2, %4
- ret <2 x i64> %5
-}
-
-define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) {
-; GENERIC-LABEL: test_pmovsxwd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:0.50]
-; GENERIC-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_pmovsxwd:
-; SLM: # %bb.0:
-; SLM-NEXT: pmovsxwd (%rdi), %xmm1 # sched: [4:1.00]
-; SLM-NEXT: pmovsxwd %xmm0, %xmm0 # sched: [1:1.00]
-; SLM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
-; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pmovsxwd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pmovsxwd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [7:0.50]
-; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pmovsxwd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [6:1.00]
-; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pmovsxwd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [6:1.00]
-; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pmovsxwd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [6:1.00]
-; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmovsxwd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [6:1.00]
-; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pmovsxwd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [6:1.00]
-; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmovsxwd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [6:1.00]
-; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pmovsxwd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:1.00]
-; SKX-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [6:1.00]
-; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmovsxwd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00]
-; SKX-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [6:1.00]
-; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pmovsxwd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pmovsxwd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [7:0.50]
-; BDVER2-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pmovsxwd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pmovsxwd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [6:1.00]
-; BTVER2-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pmovsxwd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pmovsxwd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [8:0.50]
-; ZNVER1-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %2 = sext <4 x i16> %1 to <4 x i32>
- %3 = load <4 x i16>, <4 x i16>* %a1, align 1
- %4 = sext <4 x i16> %3 to <4 x i32>
- %5 = add <4 x i32> %2, %4
- ret <4 x i32> %5
-}
-
-define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) {
-; GENERIC-LABEL: test_pmovsxwq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:0.50]
-; GENERIC-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_pmovsxwq:
-; SLM: # %bb.0:
-; SLM-NEXT: pmovsxwq (%rdi), %xmm1 # sched: [4:1.00]
-; SLM-NEXT: pmovsxwq %xmm0, %xmm0 # sched: [1:1.00]
-; SLM-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
-; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pmovsxwq:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pmovsxwq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [7:0.50]
-; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pmovsxwq:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [6:1.00]
-; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pmovsxwq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [6:1.00]
-; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pmovsxwq:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [6:1.00]
-; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmovsxwq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [6:1.00]
-; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pmovsxwq:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [6:1.00]
-; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmovsxwq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [6:1.00]
-; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pmovsxwq:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:1.00]
-; SKX-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [6:1.00]
-; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmovsxwq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00]
-; SKX-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [6:1.00]
-; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pmovsxwq:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pmovsxwq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [7:0.50]
-; BDVER2-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pmovsxwq:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pmovsxwq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [6:1.00]
-; BTVER2-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pmovsxwq:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pmovsxwq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [8:0.50]
-; ZNVER1-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
- %2 = sext <2 x i16> %1 to <2 x i64>
- %3 = load <2 x i16>, <2 x i16>* %a1, align 1
- %4 = sext <2 x i16> %3 to <2 x i64>
- %5 = add <2 x i64> %2, %4
- ret <2 x i64> %5
-}
-
-define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) {
-; GENERIC-LABEL: test_pmovzxbw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
-; GENERIC-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50]
-; GENERIC-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_pmovzxbw:
-; SLM: # %bb.0:
-; SLM-NEXT: pmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [4:1.00]
-; SLM-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
-; SLM-NEXT: paddw %xmm0, %xmm1 # sched: [1:0.50]
-; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pmovzxbw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
-; SANDY-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50]
-; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pmovzxbw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
-; SANDY-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50]
-; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pmovzxbw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
-; HASWELL-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
-; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pmovzxbw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
-; HASWELL-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
-; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pmovzxbw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
-; BROADWELL-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
-; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmovzxbw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
-; BROADWELL-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
-; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pmovzxbw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
-; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmovzxbw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
-; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
-; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pmovzxbw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
-; SKX-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
-; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmovzxbw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
-; SKX-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
-; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pmovzxbw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [2:0.50]
-; BDVER2-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50]
-; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pmovzxbw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50]
-; BDVER2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [2:0.50]
-; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pmovzxbw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
-; BTVER2-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
-; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pmovzxbw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
-; BTVER2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
-; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pmovzxbw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:0.50]
-; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pmovzxbw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:0.50]
-; ZNVER1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.25]
-; ZNVER1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
- %2 = zext <8 x i8> %1 to <8 x i16>
- %3 = load <8 x i8>, <8 x i8>* %a1, align 1
- %4 = zext <8 x i8> %3 to <8 x i16>
- %5 = add <8 x i16> %2, %4
- ret <8 x i16> %5
-}
-
-define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) {
-; GENERIC-LABEL: test_pmovzxbd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
-; GENERIC-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50]
-; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_pmovzxbd:
-; SLM: # %bb.0:
-; SLM-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [4:1.00]
-; SLM-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
-; SLM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
-; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pmovzxbd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
-; SANDY-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50]
-; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pmovzxbd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
-; SANDY-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50]
-; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pmovzxbd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
-; HASWELL-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
-; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pmovzxbd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
-; HASWELL-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
-; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pmovzxbd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
-; BROADWELL-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
-; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmovzxbd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
-; BROADWELL-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
-; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pmovzxbd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
-; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmovzxbd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
-; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
-; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pmovzxbd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
-; SKX-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
-; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmovzxbd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
-; SKX-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
-; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pmovzxbd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [2:0.50]
-; BDVER2-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50]
-; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pmovzxbd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50]
-; BDVER2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [2:0.50]
-; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pmovzxbd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
-; BTVER2-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
-; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pmovzxbd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
-; BTVER2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
-; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pmovzxbd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:0.50]
-; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pmovzxbd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:0.50]
-; ZNVER1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.25]
-; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %2 = zext <4 x i8> %1 to <4 x i32>
- %3 = load <4 x i8>, <4 x i8>* %a1, align 1
- %4 = zext <4 x i8> %3 to <4 x i32>
- %5 = add <4 x i32> %2, %4
- ret <4 x i32> %5
-}
-
-define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) {
-; GENERIC-LABEL: test_pmovzxbq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
-; GENERIC-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50]
-; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_pmovzxbq:
-; SLM: # %bb.0:
-; SLM-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [4:1.00]
-; SLM-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
-; SLM-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
-; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pmovzxbq:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
-; SANDY-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50]
-; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pmovzxbq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
-; SANDY-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50]
-; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pmovzxbq:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
-; HASWELL-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
-; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pmovzxbq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
-; HASWELL-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
-; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pmovzxbq:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
-; BROADWELL-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
-; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmovzxbq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
-; BROADWELL-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
-; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pmovzxbq:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
-; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmovzxbq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
-; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
-; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pmovzxbq:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
-; SKX-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
-; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmovzxbq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
-; SKX-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
-; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pmovzxbq:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [2:0.50]
-; BDVER2-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50]
-; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pmovzxbq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50]
-; BDVER2-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [2:0.50]
-; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pmovzxbq:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
-; BTVER2-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
-; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pmovzxbq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
-; BTVER2-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
-; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pmovzxbq:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [8:0.50]
-; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pmovzxbq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [8:0.50]
-; ZNVER1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.25]
-; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
- %2 = zext <2 x i8> %1 to <2 x i64>
- %3 = load <2 x i8>, <2 x i8>* %a1, align 1
- %4 = zext <2 x i8> %3 to <2 x i64>
- %5 = add <2 x i64> %2, %4
- ret <2 x i64> %5
-}
-
-define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) {
-; GENERIC-LABEL: test_pmovzxdq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50]
-; GENERIC-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [7:0.50]
-; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_pmovzxdq:
-; SLM: # %bb.0:
-; SLM-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [4:1.00]
-; SLM-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
-; SLM-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
-; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pmovzxdq:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50]
-; SANDY-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [7:0.50]
-; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pmovzxdq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50]
-; SANDY-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [7:0.50]
-; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pmovzxdq:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
-; HASWELL-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00]
-; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pmovzxdq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
-; HASWELL-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [6:1.00]
-; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pmovzxdq:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
-; BROADWELL-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00]
-; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmovzxdq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
-; BROADWELL-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [6:1.00]
-; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pmovzxdq:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00]
-; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmovzxdq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
-; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [6:1.00]
-; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pmovzxdq:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
-; SKX-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00]
-; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmovzxdq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
-; SKX-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [6:1.00]
-; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pmovzxdq:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [2:0.50]
-; BDVER2-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [7:0.50]
-; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pmovzxdq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [7:0.50]
-; BDVER2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [2:0.50]
-; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pmovzxdq:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50]
-; BTVER2-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00]
-; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pmovzxdq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [6:1.00]
-; BTVER2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50]
-; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pmovzxdq:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [8:0.50]
-; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pmovzxdq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [8:0.50]
-; ZNVER1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:0.25]
-; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
- %2 = zext <2 x i32> %1 to <2 x i64>
- %3 = load <2 x i32>, <2 x i32>* %a1, align 1
- %4 = zext <2 x i32> %3 to <2 x i64>
- %5 = add <2 x i64> %2, %4
- ret <2 x i64> %5
-}
-
-define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) {
-; GENERIC-LABEL: test_pmovzxwd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
-; GENERIC-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50]
-; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_pmovzxwd:
-; SLM: # %bb.0:
-; SLM-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [4:1.00]
-; SLM-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
-; SLM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
-; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pmovzxwd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
-; SANDY-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50]
-; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pmovzxwd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
-; SANDY-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50]
-; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pmovzxwd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
-; HASWELL-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
-; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pmovzxwd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
-; HASWELL-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
-; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pmovzxwd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
-; BROADWELL-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
-; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmovzxwd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
-; BROADWELL-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
-; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pmovzxwd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
-; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmovzxwd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
-; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
-; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pmovzxwd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
-; SKX-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
-; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmovzxwd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
-; SKX-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
-; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pmovzxwd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [2:0.50]
-; BDVER2-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50]
-; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pmovzxwd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50]
-; BDVER2-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [2:0.50]
-; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pmovzxwd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
-; BTVER2-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
-; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pmovzxwd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
-; BTVER2-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
-; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pmovzxwd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:0.50]
-; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pmovzxwd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:0.50]
-; ZNVER1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.25]
-; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %2 = zext <4 x i16> %1 to <4 x i32>
- %3 = load <4 x i16>, <4 x i16>* %a1, align 1
- %4 = zext <4 x i16> %3 to <4 x i32>
- %5 = add <4 x i32> %2, %4
- ret <4 x i32> %5
-}
-
-define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) {
-; GENERIC-LABEL: test_pmovzxwq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50]
-; GENERIC-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50]
-; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_pmovzxwq:
-; SLM: # %bb.0:
-; SLM-NEXT: pmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [4:1.00]
-; SLM-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
-; SLM-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
-; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pmovzxwq:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50]
-; SANDY-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50]
-; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pmovzxwq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50]
-; SANDY-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50]
-; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pmovzxwq:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
-; HASWELL-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
-; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pmovzxwq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
-; HASWELL-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
-; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pmovzxwq:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
-; BROADWELL-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
-; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmovzxwq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
-; BROADWELL-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
-; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pmovzxwq:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
-; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmovzxwq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
-; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
-; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pmovzxwq:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
-; SKX-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
-; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmovzxwq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
-; SKX-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
-; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pmovzxwq:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [2:0.50]
-; BDVER2-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50]
-; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pmovzxwq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50]
-; BDVER2-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [2:0.50]
-; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pmovzxwq:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50]
-; BTVER2-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
-; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pmovzxwq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
-; BTVER2-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50]
-; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pmovzxwq:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [8:0.50]
-; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pmovzxwq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [8:0.50]
-; ZNVER1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.25]
-; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
- %2 = zext <2 x i16> %1 to <2 x i64>
- %3 = load <2 x i16>, <2 x i16>* %a1, align 1
- %4 = zext <2 x i16> %3 to <2 x i64>
- %5 = add <2 x i64> %2, %4
- ret <2 x i64> %5
-}
-
-define <2 x i64> @test_pmuldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> *%a3) {
-; GENERIC-LABEL: test_pmuldq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pmuldq %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: pmuldq (%rdi), %xmm2 # sched: [11:1.00]
-; GENERIC-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_pmuldq:
-; SLM: # %bb.0:
-; SLM-NEXT: pmuldq (%rdi), %xmm2 # sched: [7:1.00]
-; SLM-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:1.00]
-; SLM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pmuldq:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-SSE-NEXT: pmuldq (%rdi), %xmm2 # sched: [11:1.00]
-; SANDY-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pmuldq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vpmuldq (%rdi), %xmm2, %xmm1 # sched: [11:1.00]
-; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pmuldq:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [5:1.00]
-; HASWELL-SSE-NEXT: pmuldq (%rdi), %xmm2 # sched: [11:1.00]
-; HASWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pmuldq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT: vpmuldq (%rdi), %xmm2, %xmm1 # sched: [11:1.00]
-; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pmuldq:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [5:1.00]
-; BROADWELL-SSE-NEXT: pmuldq (%rdi), %xmm2 # sched: [10:1.00]
-; BROADWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmuldq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BROADWELL-NEXT: vpmuldq (%rdi), %xmm2, %xmm1 # sched: [10:1.00]
-; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pmuldq:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: pmuldq (%rdi), %xmm2 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmuldq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vpmuldq (%rdi), %xmm2, %xmm1 # sched: [10:0.50]
-; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pmuldq:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: pmuldq (%rdi), %xmm2 # sched: [10:0.50]
-; SKX-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmuldq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vpmuldq (%rdi), %xmm2, %xmm1 # sched: [10:0.50]
-; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pmuldq:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pmuldq (%rdi), %xmm2 # sched: [9:1.00]
-; BDVER2-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:1.00]
-; BDVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pmuldq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpmuldq (%rdi), %xmm2, %xmm2 # sched: [9:1.00]
-; BDVER2-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; BDVER2-NEXT: vpor %xmm2, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pmuldq:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pmuldq (%rdi), %xmm2 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pmuldq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpmuldq (%rdi), %xmm2, %xmm2 # sched: [7:1.00]
-; BTVER2-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vpor %xmm2, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pmuldq:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pmuldq (%rdi), %xmm2 # sched: [11:1.00]
-; ZNVER1-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:1.00]
-; ZNVER1-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pmuldq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmuldq (%rdi), %xmm2, %xmm2 # sched: [11:1.00]
-; ZNVER1-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; ZNVER1-NEXT: vpor %xmm2, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1)
- %2 = load <4 x i32>, <4 x i32> *%a3, align 16
- %3 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a2, <4 x i32> %2)
- %4 = or <2 x i64> %1, %3
- ret <2 x i64> %4
-}
-declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
-
-define <4 x i32> @test_pmulld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_pmulld:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pmulld %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: pmulld (%rdi), %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_pmulld:
-; SLM: # %bb.0:
-; SLM-NEXT: pmulld %xmm1, %xmm0 # sched: [4:1.00]
-; SLM-NEXT: pmulld (%rdi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pmulld:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [11:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pmulld:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pmulld:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [10:2.00]
-; HASWELL-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [16:2.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pmulld:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:2.00]
-; HASWELL-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pmulld:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [10:2.00]
-; BROADWELL-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [15:2.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmulld:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:2.00]
-; BROADWELL-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [15:2.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pmulld:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [10:1.00]
-; SKYLAKE-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [16:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmulld:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:1.00]
-; SKYLAKE-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pmulld:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [10:1.00]
-; SKX-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [16:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmulld:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:1.00]
-; SKX-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pmulld:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [5:2.00]
-; BDVER2-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [10:2.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pmulld:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
-; BDVER2-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pmulld:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [4:2.00]
-; BTVER2-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [9:2.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pmulld:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [4:2.00]
-; BTVER2-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pmulld:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [4:1.00]
-; ZNVER1-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [11:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pmulld:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; ZNVER1-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = mul <4 x i32> %a0, %a1
- %2 = load <4 x i32>, <4 x i32> *%a2, align 16
- %3 = mul <4 x i32> %1, %2
- ret <4 x i32> %3
-}
-
-define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
-; GENERIC-LABEL: test_ptest:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: ptest %xmm1, %xmm0 # sched: [2:1.00]
-; GENERIC-NEXT: setb %al # sched: [1:0.50]
-; GENERIC-NEXT: ptest (%rdi), %xmm0 # sched: [8:1.00]
-; GENERIC-NEXT: setb %cl # sched: [1:0.50]
-; GENERIC-NEXT: andb %al, %cl # sched: [1:0.33]
-; GENERIC-NEXT: movzbl %cl, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_ptest:
-; SLM: # %bb.0:
-; SLM-NEXT: ptest %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: setb %al # sched: [1:0.50]
-; SLM-NEXT: ptest (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: setb %cl # sched: [1:0.50]
-; SLM-NEXT: andb %al, %cl # sched: [1:0.50]
-; SLM-NEXT: movzbl %cl, %eax # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_ptest:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [2:1.00]
-; SANDY-SSE-NEXT: setb %al # sched: [1:0.50]
-; SANDY-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [8:1.00]
-; SANDY-SSE-NEXT: setb %cl # sched: [1:0.50]
-; SANDY-SSE-NEXT: andb %al, %cl # sched: [1:0.33]
-; SANDY-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.33]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_ptest:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00]
-; SANDY-NEXT: setb %al # sched: [1:0.50]
-; SANDY-NEXT: vptest (%rdi), %xmm0 # sched: [8:1.00]
-; SANDY-NEXT: setb %cl # sched: [1:0.50]
-; SANDY-NEXT: andb %al, %cl # sched: [1:0.33]
-; SANDY-NEXT: movzbl %cl, %eax # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_ptest:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [2:1.00]
-; HASWELL-SSE-NEXT: setb %al # sched: [1:0.50]
-; HASWELL-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-SSE-NEXT: setb %cl # sched: [1:0.50]
-; HASWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
-; HASWELL-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.25]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_ptest:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00]
-; HASWELL-NEXT: setb %al # sched: [1:0.50]
-; HASWELL-NEXT: vptest (%rdi), %xmm0 # sched: [8:1.00]
-; HASWELL-NEXT: setb %cl # sched: [1:0.50]
-; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25]
-; HASWELL-NEXT: movzbl %cl, %eax # sched: [1:0.25]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_ptest:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [2:1.00]
-; BROADWELL-SSE-NEXT: setb %al # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [7:1.00]
-; BROADWELL-SSE-NEXT: setb %cl # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_ptest:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00]
-; BROADWELL-NEXT: setb %al # sched: [1:0.50]
-; BROADWELL-NEXT: vptest (%rdi), %xmm0 # sched: [7:1.00]
-; BROADWELL-NEXT: setb %cl # sched: [1:0.50]
-; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25]
-; BROADWELL-NEXT: movzbl %cl, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_ptest:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [3:1.00]
-; SKYLAKE-SSE-NEXT: setb %al # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [9:1.00]
-; SKYLAKE-SSE-NEXT: setb %cl # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_ptest:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vptest %xmm1, %xmm0 # sched: [3:1.00]
-; SKYLAKE-NEXT: setb %al # sched: [1:0.50]
-; SKYLAKE-NEXT: vptest (%rdi), %xmm0 # sched: [9:1.00]
-; SKYLAKE-NEXT: setb %cl # sched: [1:0.50]
-; SKYLAKE-NEXT: andb %al, %cl # sched: [1:0.25]
-; SKYLAKE-NEXT: movzbl %cl, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_ptest:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [3:1.00]
-; SKX-SSE-NEXT: setb %al # sched: [1:0.50]
-; SKX-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [9:1.00]
-; SKX-SSE-NEXT: setb %cl # sched: [1:0.50]
-; SKX-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
-; SKX-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.25]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_ptest:
-; SKX: # %bb.0:
-; SKX-NEXT: vptest %xmm1, %xmm0 # sched: [3:1.00]
-; SKX-NEXT: setb %al # sched: [1:0.50]
-; SKX-NEXT: vptest (%rdi), %xmm0 # sched: [9:1.00]
-; SKX-NEXT: setb %cl # sched: [1:0.50]
-; SKX-NEXT: andb %al, %cl # sched: [1:0.25]
-; SKX-NEXT: movzbl %cl, %eax # sched: [1:0.25]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_ptest:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [1:1.00]
-; BDVER2-SSE-NEXT: setb %al # sched: [1:0.50]
-; BDVER2-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [6:1.00]
-; BDVER2-SSE-NEXT: setb %cl # sched: [1:0.50]
-; BDVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50]
-; BDVER2-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_ptest:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vptest %xmm1, %xmm0 # sched: [1:1.00]
-; BDVER2-NEXT: setb %al # sched: [1:0.50]
-; BDVER2-NEXT: vptest (%rdi), %xmm0 # sched: [6:1.00]
-; BDVER2-NEXT: setb %cl # sched: [1:0.50]
-; BDVER2-NEXT: andb %al, %cl # sched: [1:0.50]
-; BDVER2-NEXT: movzbl %cl, %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_ptest:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: setb %al # sched: [1:0.50]
-; BTVER2-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT: setb %cl # sched: [1:0.50]
-; BTVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50]
-; BTVER2-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_ptest:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vptest %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: setb %al # sched: [1:0.50]
-; BTVER2-NEXT: vptest (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-NEXT: setb %cl # sched: [1:0.50]
-; BTVER2-NEXT: andb %al, %cl # sched: [1:0.50]
-; BTVER2-NEXT: movzbl %cl, %eax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_ptest:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [1:1.00]
-; ZNVER1-SSE-NEXT: setb %al # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [8:1.00]
-; ZNVER1-SSE-NEXT: setb %cl # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_ptest:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vptest %xmm1, %xmm0 # sched: [1:1.00]
-; ZNVER1-NEXT: setb %al # sched: [1:0.25]
-; ZNVER1-NEXT: vptest (%rdi), %xmm0 # sched: [8:1.00]
-; ZNVER1-NEXT: setb %cl # sched: [1:0.25]
-; ZNVER1-NEXT: andb %al, %cl # sched: [1:0.25]
-; ZNVER1-NEXT: movzbl %cl, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1)
- %2 = load <2 x i64>, <2 x i64> *%a2, align 16
- %3 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %2)
- %4 = and i32 %1, %3
- ret i32 %4
-}
-declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
-
-define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) {
-; GENERIC-LABEL: test_roundpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [3:1.00]
-; GENERIC-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_roundpd:
-; SLM: # %bb.0:
-; SLM-NEXT: roundpd $7, (%rdi), %xmm1 # sched: [6:1.00]
-; SLM-NEXT: roundpd $7, %xmm0, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
-; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_roundpd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [3:1.00]
-; SANDY-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_roundpd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [9:1.00]
-; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_roundpd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [6:0.50]
-; HASWELL-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [12:2.00]
-; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_roundpd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [6:0.50]
-; HASWELL-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [12:2.00]
-; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_roundpd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: roundpd $7, (%rdi), %xmm1 # sched: [11:2.00]
-; BROADWELL-SSE-NEXT: roundpd $7, %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_roundpd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [11:2.00]
-; BROADWELL-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_roundpd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [8:1.00]
-; SKYLAKE-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [14:1.00]
-; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_roundpd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [8:1.00]
-; SKYLAKE-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [14:1.00]
-; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_roundpd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [8:1.00]
-; SKX-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [14:1.00]
-; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_roundpd:
-; SKX: # %bb.0:
-; SKX-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [8:1.00]
-; SKX-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [14:1.00]
-; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_roundpd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [4:1.00]
-; BDVER2-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [9:1.00]
-; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_roundpd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [9:1.00]
-; BDVER2-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [4:1.00]
-; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_roundpd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_roundpd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [8:1.00]
-; BTVER2-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_roundpd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [4:1.00]
-; ZNVER1-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [11:1.00]
-; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_roundpd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [11:1.00]
-; ZNVER1-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [4:1.00]
-; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7)
- %2 = load <2 x double>, <2 x double> *%a1, align 16
- %3 = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %2, i32 7)
- %4 = fadd <2 x double> %1, %3
- ret <2 x double> %4
-}
-declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
-
-define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) {
-; GENERIC-LABEL: test_roundps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: roundps $7, %xmm0, %xmm1 # sched: [3:1.00]
-; GENERIC-NEXT: roundps $7, (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_roundps:
-; SLM: # %bb.0:
-; SLM-NEXT: roundps $7, (%rdi), %xmm1 # sched: [6:1.00]
-; SLM-NEXT: roundps $7, %xmm0, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_roundps:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [3:1.00]
-; SANDY-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_roundps:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [9:1.00]
-; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_roundps:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [6:0.50]
-; HASWELL-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [12:2.00]
-; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_roundps:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [6:0.50]
-; HASWELL-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [12:2.00]
-; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_roundps:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: roundps $7, (%rdi), %xmm1 # sched: [11:2.00]
-; BROADWELL-SSE-NEXT: roundps $7, %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_roundps:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [11:2.00]
-; BROADWELL-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_roundps:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [8:1.00]
-; SKYLAKE-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [14:1.00]
-; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_roundps:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [8:1.00]
-; SKYLAKE-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [14:1.00]
-; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_roundps:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [8:1.00]
-; SKX-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [14:1.00]
-; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_roundps:
-; SKX: # %bb.0:
-; SKX-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [8:1.00]
-; SKX-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [14:1.00]
-; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_roundps:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [4:1.00]
-; BDVER2-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [9:1.00]
-; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_roundps:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [9:1.00]
-; BDVER2-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [4:1.00]
-; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_roundps:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_roundps:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [8:1.00]
-; BTVER2-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_roundps:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [4:1.00]
-; ZNVER1-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [11:1.00]
-; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_roundps:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [11:1.00]
-; ZNVER1-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [4:1.00]
-; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7)
- %2 = load <4 x float>, <4 x float> *%a1, align 16
- %3 = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %2, i32 7)
- %4 = fadd <4 x float> %1, %3
- ret <4 x float> %4
-}
-declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
-
-define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
-; GENERIC-LABEL: test_roundsd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00]
-; GENERIC-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [3:1.00]
-; GENERIC-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_roundsd:
-; SLM: # %bb.0:
-; SLM-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50]
-; SLM-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [3:1.00]
-; SLM-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_roundsd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00]
-; SANDY-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [3:1.00]
-; SANDY-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_roundsd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
-; SANDY-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_roundsd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [6:0.50]
-; HASWELL-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [12:2.00]
-; HASWELL-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_roundsd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [6:0.50]
-; HASWELL-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
-; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_roundsd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [11:2.00]
-; BROADWELL-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_roundsd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm2 # sched: [11:2.00]
-; BROADWELL-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: vaddpd %xmm2, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_roundsd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [8:1.00]
-; SKYLAKE-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [14:1.00]
-; SKYLAKE-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_roundsd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [8:1.00]
-; SKYLAKE-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [14:1.00]
-; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_roundsd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.33]
-; SKX-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [8:1.00]
-; SKX-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [14:1.00]
-; SKX-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_roundsd:
-; SKX: # %bb.0:
-; SKX-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [8:1.00]
-; SKX-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [14:1.00]
-; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_roundsd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50]
-; BDVER2-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [9:1.00]
-; BDVER2-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [4:1.00]
-; BDVER2-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_roundsd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [4:1.00]
-; BDVER2-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_roundsd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_roundsd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
-; BTVER2-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_roundsd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [11:1.00]
-; ZNVER1-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [4:1.00]
-; ZNVER1-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_roundsd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [4:1.00]
-; ZNVER1-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; ZNVER1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7)
- %2 = load <2 x double>, <2 x double>* %a2, align 16
- %3 = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %2, i32 7)
- %4 = fadd <2 x double> %1, %3
- ret <2 x double> %4
-}
-declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
-
-define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
-; GENERIC-LABEL: test_roundss:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movaps %xmm0, %xmm2 # sched: [1:1.00]
-; GENERIC-NEXT: roundss $7, %xmm1, %xmm2 # sched: [3:1.00]
-; GENERIC-NEXT: roundss $7, (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_roundss:
-; SLM: # %bb.0:
-; SLM-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.50]
-; SLM-NEXT: roundss $7, (%rdi), %xmm0 # sched: [6:1.00]
-; SLM-NEXT: roundss $7, %xmm1, %xmm2 # sched: [3:1.00]
-; SLM-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_roundss:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:1.00]
-; SANDY-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [3:1.00]
-; SANDY-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [9:1.00]
-; SANDY-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_roundss:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
-; SANDY-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_roundss:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [6:0.50]
-; HASWELL-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [12:2.00]
-; HASWELL-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_roundss:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [6:0.50]
-; HASWELL-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
-; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_roundss:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [11:2.00]
-; BROADWELL-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_roundss:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vroundss $7, (%rdi), %xmm0, %xmm2 # sched: [11:2.00]
-; BROADWELL-NEXT: vroundss $7, %xmm1, %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: vaddps %xmm2, %xmm0, %xmm0 # sched: [3:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_roundss:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [8:1.00]
-; SKYLAKE-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [14:1.00]
-; SKYLAKE-SSE-NEXT: addps %xmm2, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_roundss:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [8:1.00]
-; SKYLAKE-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [14:1.00]
-; SKYLAKE-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_roundss:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.33]
-; SKX-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [8:1.00]
-; SKX-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [14:1.00]
-; SKX-SSE-NEXT: addps %xmm2, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_roundss:
-; SKX: # %bb.0:
-; SKX-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [8:1.00]
-; SKX-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [14:1.00]
-; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_roundss:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.50]
-; BDVER2-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [9:1.00]
-; BDVER2-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [4:1.00]
-; BDVER2-SSE-NEXT: addps %xmm2, %xmm0 # sched: [5:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_roundss:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [4:1.00]
-; BDVER2-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; BDVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_roundss:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [8:1.00]
-; BTVER2-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_roundss:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
-; BTVER2-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
-; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_roundss:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [11:1.00]
-; ZNVER1-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [4:1.00]
-; ZNVER1-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_roundss:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [4:1.00]
-; ZNVER1-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; ZNVER1-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7)
- %2 = load <4 x float>, <4 x float> *%a2, align 16
- %3 = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %2, i32 7)
- %4 = fadd <4 x float> %1, %3
- ret <4 x float> %4
-}
-declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
diff --git a/llvm/test/CodeGen/X86/sse42-schedule.ll b/llvm/test/CodeGen/X86/sse42-schedule.ll
deleted file mode 100644
index 97dffb4db09..00000000000
--- a/llvm/test/CodeGen/X86/sse42-schedule.ll
+++ /dev/null
@@ -1,1631 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4.2,+pclmul | FileCheck %s --check-prefixes=CHECK,GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-avx | FileCheck %s --check-prefixes=CHECK,SLM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx | FileCheck %s --check-prefixes=CHECK,SKX-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-avx | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-avx2,-xop | FileCheck %s --check-prefixes=CHECK,BDVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1
-
-define i32 @crc32_32_8(i32 %a0, i8 %a1, i8 *%a2) {
-; GENERIC-LABEL: crc32_32_8:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33]
-; GENERIC-NEXT: crc32b %sil, %eax # sched: [3:1.00]
-; GENERIC-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: crc32_32_8:
-; SLM: # %bb.0:
-; SLM-NEXT: movl %edi, %eax # sched: [1:0.50]
-; SLM-NEXT: crc32b %sil, %eax # sched: [3:1.00]
-; SLM-NEXT: crc32b (%rdx), %eax # sched: [6:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: crc32_32_8:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movl %edi, %eax # sched: [1:0.33]
-; SANDY-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00]
-; SANDY-SSE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: crc32_32_8:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33]
-; SANDY-NEXT: crc32b %sil, %eax # sched: [3:1.00]
-; SANDY-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: crc32_32_8:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movl %edi, %eax # sched: [1:0.25]
-; HASWELL-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00]
-; HASWELL-SSE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: crc32_32_8:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25]
-; HASWELL-NEXT: crc32b %sil, %eax # sched: [3:1.00]
-; HASWELL-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: crc32_32_8:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movl %edi, %eax # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: crc32_32_8:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl %edi, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: crc32b %sil, %eax # sched: [3:1.00]
-; BROADWELL-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: crc32_32_8:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movl %edi, %eax # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00]
-; SKYLAKE-SSE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: crc32_32_8:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl %edi, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: crc32b %sil, %eax # sched: [3:1.00]
-; SKYLAKE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: crc32_32_8:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movl %edi, %eax # sched: [1:0.25]
-; SKX-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00]
-; SKX-SSE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: crc32_32_8:
-; SKX: # %bb.0:
-; SKX-NEXT: movl %edi, %eax # sched: [1:0.25]
-; SKX-NEXT: crc32b %sil, %eax # sched: [3:1.00]
-; SKX-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: crc32_32_8:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.50]
-; BDVER2-SSE-NEXT: crc32b %sil, %eax # sched: [3:2.00]
-; BDVER2-SSE-NEXT: crc32b (%rdx), %eax # sched: [7:2.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: crc32_32_8:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl %edi, %eax # sched: [1:0.50]
-; BDVER2-NEXT: crc32b %sil, %eax # sched: [3:2.00]
-; BDVER2-NEXT: crc32b (%rdx), %eax # sched: [7:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: crc32_32_8:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.50]
-; BTVER2-SSE-NEXT: crc32b %sil, %eax # sched: [3:2.00]
-; BTVER2-SSE-NEXT: crc32b (%rdx), %eax # sched: [6:2.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: crc32_32_8:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl %edi, %eax # sched: [1:0.50]
-; BTVER2-NEXT: crc32b %sil, %eax # sched: [3:2.00]
-; BTVER2-NEXT: crc32b (%rdx), %eax # sched: [6:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: crc32_32_8:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movl %edi, %eax # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: crc32b (%rdx), %eax # sched: [10:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: crc32_32_8:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl %edi, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: crc32b %sil, %eax # sched: [3:1.00]
-; ZNVER1-NEXT: crc32b (%rdx), %eax # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %a1)
- %2 = load i8, i8 *%a2
- %3 = call i32 @llvm.x86.sse42.crc32.32.8(i32 %1, i8 %2)
- ret i32 %3
-}
-declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
-
-define i32 @crc32_32_16(i32 %a0, i16 %a1, i16 *%a2) {
-; GENERIC-LABEL: crc32_32_16:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33]
-; GENERIC-NEXT: crc32w %si, %eax # sched: [3:1.00]
-; GENERIC-NEXT: crc32w (%rdx), %eax # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: crc32_32_16:
-; SLM: # %bb.0:
-; SLM-NEXT: movl %edi, %eax # sched: [1:0.50]
-; SLM-NEXT: crc32w %si, %eax # sched: [3:1.00]
-; SLM-NEXT: crc32w (%rdx), %eax # sched: [6:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: crc32_32_16:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movl %edi, %eax # sched: [1:0.33]
-; SANDY-SSE-NEXT: crc32w %si, %eax # sched: [3:1.00]
-; SANDY-SSE-NEXT: crc32w (%rdx), %eax # sched: [8:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: crc32_32_16:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33]
-; SANDY-NEXT: crc32w %si, %eax # sched: [3:1.00]
-; SANDY-NEXT: crc32w (%rdx), %eax # sched: [8:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: crc32_32_16:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movl %edi, %eax # sched: [1:0.25]
-; HASWELL-SSE-NEXT: crc32w %si, %eax # sched: [3:1.00]
-; HASWELL-SSE-NEXT: crc32w (%rdx), %eax # sched: [8:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: crc32_32_16:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25]
-; HASWELL-NEXT: crc32w %si, %eax # sched: [3:1.00]
-; HASWELL-NEXT: crc32w (%rdx), %eax # sched: [8:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: crc32_32_16:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movl %edi, %eax # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: crc32w %si, %eax # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: crc32w (%rdx), %eax # sched: [8:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: crc32_32_16:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl %edi, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: crc32w %si, %eax # sched: [3:1.00]
-; BROADWELL-NEXT: crc32w (%rdx), %eax # sched: [8:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: crc32_32_16:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movl %edi, %eax # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: crc32w %si, %eax # sched: [3:1.00]
-; SKYLAKE-SSE-NEXT: crc32w (%rdx), %eax # sched: [8:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: crc32_32_16:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl %edi, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: crc32w %si, %eax # sched: [3:1.00]
-; SKYLAKE-NEXT: crc32w (%rdx), %eax # sched: [8:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: crc32_32_16:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movl %edi, %eax # sched: [1:0.25]
-; SKX-SSE-NEXT: crc32w %si, %eax # sched: [3:1.00]
-; SKX-SSE-NEXT: crc32w (%rdx), %eax # sched: [8:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: crc32_32_16:
-; SKX: # %bb.0:
-; SKX-NEXT: movl %edi, %eax # sched: [1:0.25]
-; SKX-NEXT: crc32w %si, %eax # sched: [3:1.00]
-; SKX-NEXT: crc32w (%rdx), %eax # sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: crc32_32_16:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.50]
-; BDVER2-SSE-NEXT: crc32w %si, %eax # sched: [5:2.00]
-; BDVER2-SSE-NEXT: crc32w (%rdx), %eax # sched: [7:2.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: crc32_32_16:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl %edi, %eax # sched: [1:0.50]
-; BDVER2-NEXT: crc32w %si, %eax # sched: [5:2.00]
-; BDVER2-NEXT: crc32w (%rdx), %eax # sched: [7:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: crc32_32_16:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.50]
-; BTVER2-SSE-NEXT: crc32w %si, %eax # sched: [3:2.00]
-; BTVER2-SSE-NEXT: crc32w (%rdx), %eax # sched: [6:2.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: crc32_32_16:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl %edi, %eax # sched: [1:0.50]
-; BTVER2-NEXT: crc32w %si, %eax # sched: [3:2.00]
-; BTVER2-NEXT: crc32w (%rdx), %eax # sched: [6:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: crc32_32_16:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movl %edi, %eax # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: crc32w %si, %eax # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: crc32w (%rdx), %eax # sched: [10:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: crc32_32_16:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl %edi, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: crc32w %si, %eax # sched: [3:1.00]
-; ZNVER1-NEXT: crc32w (%rdx), %eax # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %a1)
- %2 = load i16, i16 *%a2
- %3 = call i32 @llvm.x86.sse42.crc32.32.16(i32 %1, i16 %2)
- ret i32 %3
-}
-declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
-
-define i32 @crc32_32_32(i32 %a0, i32 %a1, i32 *%a2) {
-; GENERIC-LABEL: crc32_32_32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33]
-; GENERIC-NEXT: crc32l %esi, %eax # sched: [3:1.00]
-; GENERIC-NEXT: crc32l (%rdx), %eax # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: crc32_32_32:
-; SLM: # %bb.0:
-; SLM-NEXT: movl %edi, %eax # sched: [1:0.50]
-; SLM-NEXT: crc32l %esi, %eax # sched: [3:1.00]
-; SLM-NEXT: crc32l (%rdx), %eax # sched: [6:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: crc32_32_32:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movl %edi, %eax # sched: [1:0.33]
-; SANDY-SSE-NEXT: crc32l %esi, %eax # sched: [3:1.00]
-; SANDY-SSE-NEXT: crc32l (%rdx), %eax # sched: [8:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: crc32_32_32:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33]
-; SANDY-NEXT: crc32l %esi, %eax # sched: [3:1.00]
-; SANDY-NEXT: crc32l (%rdx), %eax # sched: [8:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: crc32_32_32:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movl %edi, %eax # sched: [1:0.25]
-; HASWELL-SSE-NEXT: crc32l %esi, %eax # sched: [3:1.00]
-; HASWELL-SSE-NEXT: crc32l (%rdx), %eax # sched: [8:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: crc32_32_32:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25]
-; HASWELL-NEXT: crc32l %esi, %eax # sched: [3:1.00]
-; HASWELL-NEXT: crc32l (%rdx), %eax # sched: [8:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: crc32_32_32:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movl %edi, %eax # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: crc32l %esi, %eax # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: crc32l (%rdx), %eax # sched: [8:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: crc32_32_32:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl %edi, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: crc32l %esi, %eax # sched: [3:1.00]
-; BROADWELL-NEXT: crc32l (%rdx), %eax # sched: [8:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: crc32_32_32:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movl %edi, %eax # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: crc32l %esi, %eax # sched: [3:1.00]
-; SKYLAKE-SSE-NEXT: crc32l (%rdx), %eax # sched: [8:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: crc32_32_32:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl %edi, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: crc32l %esi, %eax # sched: [3:1.00]
-; SKYLAKE-NEXT: crc32l (%rdx), %eax # sched: [8:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: crc32_32_32:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movl %edi, %eax # sched: [1:0.25]
-; SKX-SSE-NEXT: crc32l %esi, %eax # sched: [3:1.00]
-; SKX-SSE-NEXT: crc32l (%rdx), %eax # sched: [8:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: crc32_32_32:
-; SKX: # %bb.0:
-; SKX-NEXT: movl %edi, %eax # sched: [1:0.25]
-; SKX-NEXT: crc32l %esi, %eax # sched: [3:1.00]
-; SKX-NEXT: crc32l (%rdx), %eax # sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: crc32_32_32:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.50]
-; BDVER2-SSE-NEXT: crc32l %esi, %eax # sched: [6:2.00]
-; BDVER2-SSE-NEXT: crc32l (%rdx), %eax # sched: [7:2.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: crc32_32_32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl %edi, %eax # sched: [1:0.50]
-; BDVER2-NEXT: crc32l %esi, %eax # sched: [6:2.00]
-; BDVER2-NEXT: crc32l (%rdx), %eax # sched: [7:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: crc32_32_32:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.50]
-; BTVER2-SSE-NEXT: crc32l %esi, %eax # sched: [3:2.00]
-; BTVER2-SSE-NEXT: crc32l (%rdx), %eax # sched: [6:2.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: crc32_32_32:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl %edi, %eax # sched: [1:0.50]
-; BTVER2-NEXT: crc32l %esi, %eax # sched: [3:2.00]
-; BTVER2-NEXT: crc32l (%rdx), %eax # sched: [6:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: crc32_32_32:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movl %edi, %eax # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: crc32l %esi, %eax # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: crc32l (%rdx), %eax # sched: [10:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: crc32_32_32:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl %edi, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: crc32l %esi, %eax # sched: [3:1.00]
-; ZNVER1-NEXT: crc32l (%rdx), %eax # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1)
- %2 = load i32, i32 *%a2
- %3 = call i32 @llvm.x86.sse42.crc32.32.32(i32 %1, i32 %2)
- ret i32 %3
-}
-declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind
-
-define i64 @crc32_64_8(i64 %a0, i8 %a1, i8 *%a2) nounwind {
-; GENERIC-LABEL: crc32_64_8:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33]
-; GENERIC-NEXT: crc32b %sil, %eax # sched: [3:1.00]
-; GENERIC-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: crc32_64_8:
-; SLM: # %bb.0:
-; SLM-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; SLM-NEXT: crc32b %sil, %eax # sched: [3:1.00]
-; SLM-NEXT: crc32b (%rdx), %eax # sched: [6:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: crc32_64_8:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movq %rdi, %rax # sched: [1:0.33]
-; SANDY-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00]
-; SANDY-SSE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: crc32_64_8:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33]
-; SANDY-NEXT: crc32b %sil, %eax # sched: [3:1.00]
-; SANDY-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: crc32_64_8:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25]
-; HASWELL-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00]
-; HASWELL-SSE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: crc32_64_8:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25]
-; HASWELL-NEXT: crc32b %sil, %eax # sched: [3:1.00]
-; HASWELL-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: crc32_64_8:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: crc32_64_8:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movq %rdi, %rax # sched: [1:0.25]
-; BROADWELL-NEXT: crc32b %sil, %eax # sched: [3:1.00]
-; BROADWELL-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: crc32_64_8:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00]
-; SKYLAKE-SSE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: crc32_64_8:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movq %rdi, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT: crc32b %sil, %eax # sched: [3:1.00]
-; SKYLAKE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: crc32_64_8:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25]
-; SKX-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00]
-; SKX-SSE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: crc32_64_8:
-; SKX: # %bb.0:
-; SKX-NEXT: movq %rdi, %rax # sched: [1:0.25]
-; SKX-NEXT: crc32b %sil, %eax # sched: [3:1.00]
-; SKX-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: crc32_64_8:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; BDVER2-SSE-NEXT: crc32b %sil, %eax # sched: [3:2.00]
-; BDVER2-SSE-NEXT: crc32b (%rdx), %eax # sched: [7:2.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: crc32_64_8:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; BDVER2-NEXT: crc32b %sil, %eax # sched: [3:2.00]
-; BDVER2-NEXT: crc32b (%rdx), %eax # sched: [7:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: crc32_64_8:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; BTVER2-SSE-NEXT: crc32b %sil, %eax # sched: [3:2.00]
-; BTVER2-SSE-NEXT: crc32b (%rdx), %eax # sched: [6:2.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: crc32_64_8:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; BTVER2-NEXT: crc32b %sil, %eax # sched: [3:2.00]
-; BTVER2-NEXT: crc32b (%rdx), %eax # sched: [6:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: crc32_64_8:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: crc32b (%rdx), %eax # sched: [10:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: crc32_64_8:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movq %rdi, %rax # sched: [1:0.25]
-; ZNVER1-NEXT: crc32b %sil, %eax # sched: [3:1.00]
-; ZNVER1-NEXT: crc32b (%rdx), %eax # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a0, i8 %a1)
- %2 = load i8, i8 *%a2
- %3 = call i64 @llvm.x86.sse42.crc32.64.8(i64 %1, i8 %2)
- ret i64 %3
-}
-declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind
-
-define i64 @crc32_64_64(i64 %a0, i64 %a1, i64 *%a2) {
-; GENERIC-LABEL: crc32_64_64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33]
-; GENERIC-NEXT: crc32q %rsi, %rax # sched: [3:1.00]
-; GENERIC-NEXT: crc32q (%rdx), %rax # sched: [8:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: crc32_64_64:
-; SLM: # %bb.0:
-; SLM-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; SLM-NEXT: crc32q %rsi, %rax # sched: [3:1.00]
-; SLM-NEXT: crc32q (%rdx), %rax # sched: [6:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: crc32_64_64:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movq %rdi, %rax # sched: [1:0.33]
-; SANDY-SSE-NEXT: crc32q %rsi, %rax # sched: [3:1.00]
-; SANDY-SSE-NEXT: crc32q (%rdx), %rax # sched: [8:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: crc32_64_64:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33]
-; SANDY-NEXT: crc32q %rsi, %rax # sched: [3:1.00]
-; SANDY-NEXT: crc32q (%rdx), %rax # sched: [8:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: crc32_64_64:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25]
-; HASWELL-SSE-NEXT: crc32q %rsi, %rax # sched: [3:1.00]
-; HASWELL-SSE-NEXT: crc32q (%rdx), %rax # sched: [8:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: crc32_64_64:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25]
-; HASWELL-NEXT: crc32q %rsi, %rax # sched: [3:1.00]
-; HASWELL-NEXT: crc32q (%rdx), %rax # sched: [8:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: crc32_64_64:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: crc32q %rsi, %rax # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: crc32q (%rdx), %rax # sched: [8:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: crc32_64_64:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movq %rdi, %rax # sched: [1:0.25]
-; BROADWELL-NEXT: crc32q %rsi, %rax # sched: [3:1.00]
-; BROADWELL-NEXT: crc32q (%rdx), %rax # sched: [8:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: crc32_64_64:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: crc32q %rsi, %rax # sched: [3:1.00]
-; SKYLAKE-SSE-NEXT: crc32q (%rdx), %rax # sched: [8:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: crc32_64_64:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movq %rdi, %rax # sched: [1:0.25]
-; SKYLAKE-NEXT: crc32q %rsi, %rax # sched: [3:1.00]
-; SKYLAKE-NEXT: crc32q (%rdx), %rax # sched: [8:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: crc32_64_64:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25]
-; SKX-SSE-NEXT: crc32q %rsi, %rax # sched: [3:1.00]
-; SKX-SSE-NEXT: crc32q (%rdx), %rax # sched: [8:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: crc32_64_64:
-; SKX: # %bb.0:
-; SKX-NEXT: movq %rdi, %rax # sched: [1:0.25]
-; SKX-NEXT: crc32q %rsi, %rax # sched: [3:1.00]
-; SKX-NEXT: crc32q (%rdx), %rax # sched: [8:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: crc32_64_64:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; BDVER2-SSE-NEXT: crc32q %rsi, %rax # sched: [10:2.00]
-; BDVER2-SSE-NEXT: crc32q (%rdx), %rax # sched: [7:2.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: crc32_64_64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; BDVER2-NEXT: crc32q %rsi, %rax # sched: [10:2.00]
-; BDVER2-NEXT: crc32q (%rdx), %rax # sched: [7:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: crc32_64_64:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; BTVER2-SSE-NEXT: crc32q %rsi, %rax # sched: [3:2.00]
-; BTVER2-SSE-NEXT: crc32q (%rdx), %rax # sched: [6:2.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: crc32_64_64:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50]
-; BTVER2-NEXT: crc32q %rsi, %rax # sched: [3:2.00]
-; BTVER2-NEXT: crc32q (%rdx), %rax # sched: [6:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: crc32_64_64:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: crc32q %rsi, %rax # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: crc32q (%rdx), %rax # sched: [10:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: crc32_64_64:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movq %rdi, %rax # sched: [1:0.25]
-; ZNVER1-NEXT: crc32q %rsi, %rax # sched: [3:1.00]
-; ZNVER1-NEXT: crc32q (%rdx), %rax # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a0, i64 %a1)
- %2 = load i64, i64 *%a2
- %3 = call i64 @llvm.x86.sse42.crc32.64.64(i64 %1, i64 %2)
- ret i64 %3
-}
-declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind
-
-define i32 @test_pcmpestri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
-; GENERIC-LABEL: test_pcmpestri:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl $7, %eax # sched: [1:0.33]
-; GENERIC-NEXT: movl $7, %edx # sched: [1:0.33]
-; GENERIC-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [4:2.67]
-; GENERIC-NEXT: movl %ecx, %esi # sched: [1:0.33]
-; GENERIC-NEXT: movl $7, %eax # sched: [1:0.33]
-; GENERIC-NEXT: movl $7, %edx # sched: [1:0.33]
-; GENERIC-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [4:2.33]
-; GENERIC-NEXT: # kill: def $ecx killed $ecx def $rcx
-; GENERIC-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_pcmpestri:
-; SLM: # %bb.0:
-; SLM-NEXT: movl $7, %eax # sched: [1:0.50]
-; SLM-NEXT: movl $7, %edx # sched: [1:0.50]
-; SLM-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [21:21.00]
-; SLM-NEXT: movl $7, %eax # sched: [1:0.50]
-; SLM-NEXT: movl $7, %edx # sched: [1:0.50]
-; SLM-NEXT: movl %ecx, %esi # sched: [1:0.50]
-; SLM-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [21:21.00]
-; SLM-NEXT: # kill: def $ecx killed $ecx def $rcx
-; SLM-NEXT: leal (%rcx,%rsi), %eax # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pcmpestri:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movl $7, %eax # sched: [1:0.33]
-; SANDY-SSE-NEXT: movl $7, %edx # sched: [1:0.33]
-; SANDY-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [4:2.67]
-; SANDY-SSE-NEXT: movl %ecx, %esi # sched: [1:0.33]
-; SANDY-SSE-NEXT: movl $7, %eax # sched: [1:0.33]
-; SANDY-SSE-NEXT: movl $7, %edx # sched: [1:0.33]
-; SANDY-SSE-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [4:2.33]
-; SANDY-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
-; SANDY-SSE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pcmpestri:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl $7, %eax # sched: [1:0.33]
-; SANDY-NEXT: movl $7, %edx # sched: [1:0.33]
-; SANDY-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [4:2.67]
-; SANDY-NEXT: movl %ecx, %esi # sched: [1:0.33]
-; SANDY-NEXT: movl $7, %eax # sched: [1:0.33]
-; SANDY-NEXT: movl $7, %edx # sched: [1:0.33]
-; SANDY-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [4:2.33]
-; SANDY-NEXT: # kill: def $ecx killed $ecx def $rcx
-; SANDY-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pcmpestri:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
-; HASWELL-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
-; HASWELL-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00]
-; HASWELL-SSE-NEXT: movl %ecx, %esi # sched: [1:0.25]
-; HASWELL-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
-; HASWELL-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
-; HASWELL-SSE-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [24:4.00]
-; HASWELL-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
-; HASWELL-SSE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pcmpestri:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25]
-; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25]
-; HASWELL-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00]
-; HASWELL-NEXT: movl %ecx, %esi # sched: [1:0.25]
-; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25]
-; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25]
-; HASWELL-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [24:4.00]
-; HASWELL-NEXT: # kill: def $ecx killed $ecx def $rcx
-; HASWELL-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pcmpestri:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00]
-; BROADWELL-SSE-NEXT: movl %ecx, %esi # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [23:4.00]
-; BROADWELL-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
-; BROADWELL-SSE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pcmpestri:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl $7, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: movl $7, %edx # sched: [1:0.25]
-; BROADWELL-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00]
-; BROADWELL-NEXT: movl %ecx, %esi # sched: [1:0.25]
-; BROADWELL-NEXT: movl $7, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: movl $7, %edx # sched: [1:0.25]
-; BROADWELL-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [23:4.00]
-; BROADWELL-NEXT: # kill: def $ecx killed $ecx def $rcx
-; BROADWELL-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pcmpestri:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00]
-; SKYLAKE-SSE-NEXT: movl %ecx, %esi # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [24:4.00]
-; SKYLAKE-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
-; SKYLAKE-SSE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pcmpestri:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl $7, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: movl $7, %edx # sched: [1:0.25]
-; SKYLAKE-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00]
-; SKYLAKE-NEXT: movl %ecx, %esi # sched: [1:0.25]
-; SKYLAKE-NEXT: movl $7, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: movl $7, %edx # sched: [1:0.25]
-; SKYLAKE-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [24:4.00]
-; SKYLAKE-NEXT: # kill: def $ecx killed $ecx def $rcx
-; SKYLAKE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pcmpestri:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
-; SKX-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
-; SKX-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00]
-; SKX-SSE-NEXT: movl %ecx, %esi # sched: [1:0.25]
-; SKX-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
-; SKX-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
-; SKX-SSE-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [24:4.00]
-; SKX-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
-; SKX-SSE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pcmpestri:
-; SKX: # %bb.0:
-; SKX-NEXT: movl $7, %eax # sched: [1:0.25]
-; SKX-NEXT: movl $7, %edx # sched: [1:0.25]
-; SKX-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00]
-; SKX-NEXT: movl %ecx, %esi # sched: [1:0.25]
-; SKX-NEXT: movl $7, %eax # sched: [1:0.25]
-; SKX-NEXT: movl $7, %edx # sched: [1:0.25]
-; SKX-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [24:4.00]
-; SKX-NEXT: # kill: def $ecx killed $ecx def $rcx
-; SKX-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pcmpestri:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.50]
-; BDVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.50]
-; BDVER2-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [15:4.00]
-; BDVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.50]
-; BDVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.50]
-; BDVER2-SSE-NEXT: movl %ecx, %esi # sched: [1:0.50]
-; BDVER2-SSE-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [20:4.50]
-; BDVER2-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
-; BDVER2-SSE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pcmpestri:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl $7, %eax # sched: [1:0.50]
-; BDVER2-NEXT: movl $7, %edx # sched: [1:0.50]
-; BDVER2-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [15:4.00]
-; BDVER2-NEXT: movl $7, %eax # sched: [1:0.50]
-; BDVER2-NEXT: movl $7, %edx # sched: [1:0.50]
-; BDVER2-NEXT: movl %ecx, %esi # sched: [1:0.50]
-; BDVER2-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [20:4.50]
-; BDVER2-NEXT: # kill: def $ecx killed $ecx def $rcx
-; BDVER2-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pcmpestri:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.50]
-; BTVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [14:5.00]
-; BTVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.50]
-; BTVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.50]
-; BTVER2-SSE-NEXT: movl %ecx, %esi # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [19:5.00]
-; BTVER2-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
-; BTVER2-SSE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pcmpestri:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl $7, %eax # sched: [1:0.50]
-; BTVER2-NEXT: movl $7, %edx # sched: [1:0.50]
-; BTVER2-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [14:5.00]
-; BTVER2-NEXT: movl $7, %eax # sched: [1:0.50]
-; BTVER2-NEXT: movl $7, %edx # sched: [1:0.50]
-; BTVER2-NEXT: movl %ecx, %esi # sched: [1:0.50]
-; BTVER2-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [19:5.00]
-; BTVER2-NEXT: # kill: def $ecx killed $ecx def $rcx
-; BTVER2-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pcmpestri:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: movl %ecx, %esi # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
-; ZNVER1-SSE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pcmpestri:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl $7, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: movl $7, %edx # sched: [1:0.25]
-; ZNVER1-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: movl $7, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: movl $7, %edx # sched: [1:0.25]
-; ZNVER1-NEXT: movl %ecx, %esi # sched: [1:0.25]
-; ZNVER1-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: # kill: def $ecx killed $ecx def $rcx
-; ZNVER1-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a1, i32 7, i8 7)
- %2 = load <16 x i8>, <16 x i8> *%a2, align 16
- %3 = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %2, i32 7, i8 7)
- %4 = add i32 %1, %3
- ret i32 %4
-}
-declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
-
-define <16 x i8> @test_pcmpestrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
-; GENERIC-LABEL: test_pcmpestrm:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl $7, %eax # sched: [1:0.33]
-; GENERIC-NEXT: movl $7, %edx # sched: [1:0.33]
-; GENERIC-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [11:2.67]
-; GENERIC-NEXT: movl $7, %eax # sched: [1:0.33]
-; GENERIC-NEXT: movl $7, %edx # sched: [1:0.33]
-; GENERIC-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [11:2.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_pcmpestrm:
-; SLM: # %bb.0:
-; SLM-NEXT: movl $7, %eax # sched: [1:0.50]
-; SLM-NEXT: movl $7, %edx # sched: [1:0.50]
-; SLM-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [17:17.00]
-; SLM-NEXT: movl $7, %eax # sched: [1:0.50]
-; SLM-NEXT: movl $7, %edx # sched: [1:0.50]
-; SLM-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [17:17.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pcmpestrm:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: movl $7, %eax # sched: [1:0.33]
-; SANDY-SSE-NEXT: movl $7, %edx # sched: [1:0.33]
-; SANDY-SSE-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [11:2.67]
-; SANDY-SSE-NEXT: movl $7, %eax # sched: [1:0.33]
-; SANDY-SSE-NEXT: movl $7, %edx # sched: [1:0.33]
-; SANDY-SSE-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [11:2.33]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pcmpestrm:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl $7, %eax # sched: [1:0.33]
-; SANDY-NEXT: movl $7, %edx # sched: [1:0.33]
-; SANDY-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [11:2.67]
-; SANDY-NEXT: movl $7, %eax # sched: [1:0.33]
-; SANDY-NEXT: movl $7, %edx # sched: [1:0.33]
-; SANDY-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [11:2.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pcmpestrm:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
-; HASWELL-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
-; HASWELL-SSE-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00]
-; HASWELL-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
-; HASWELL-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
-; HASWELL-SSE-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [25:4.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pcmpestrm:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25]
-; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25]
-; HASWELL-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00]
-; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25]
-; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25]
-; HASWELL-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [25:4.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pcmpestrm:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00]
-; BROADWELL-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [24:4.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pcmpestrm:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl $7, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: movl $7, %edx # sched: [1:0.25]
-; BROADWELL-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00]
-; BROADWELL-NEXT: movl $7, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: movl $7, %edx # sched: [1:0.25]
-; BROADWELL-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [24:4.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pcmpestrm:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00]
-; SKYLAKE-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [25:4.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pcmpestrm:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl $7, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: movl $7, %edx # sched: [1:0.25]
-; SKYLAKE-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00]
-; SKYLAKE-NEXT: movl $7, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: movl $7, %edx # sched: [1:0.25]
-; SKYLAKE-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [25:4.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pcmpestrm:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
-; SKX-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
-; SKX-SSE-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00]
-; SKX-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
-; SKX-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
-; SKX-SSE-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [25:4.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pcmpestrm:
-; SKX: # %bb.0:
-; SKX-NEXT: movl $7, %eax # sched: [1:0.25]
-; SKX-NEXT: movl $7, %edx # sched: [1:0.25]
-; SKX-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00]
-; SKX-NEXT: movl $7, %eax # sched: [1:0.25]
-; SKX-NEXT: movl $7, %edx # sched: [1:0.25]
-; SKX-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [25:4.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pcmpestrm:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.50]
-; BDVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.50]
-; BDVER2-SSE-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [10:4.00]
-; BDVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.50]
-; BDVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.50]
-; BDVER2-SSE-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [15:4.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pcmpestrm:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl $7, %eax # sched: [1:0.50]
-; BDVER2-NEXT: movl $7, %edx # sched: [1:0.50]
-; BDVER2-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [10:4.00]
-; BDVER2-NEXT: movl $7, %eax # sched: [1:0.50]
-; BDVER2-NEXT: movl $7, %edx # sched: [1:0.50]
-; BDVER2-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [15:4.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pcmpestrm:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.50]
-; BTVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [14:5.00]
-; BTVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.50]
-; BTVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [19:5.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pcmpestrm:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl $7, %eax # sched: [1:0.50]
-; BTVER2-NEXT: movl $7, %edx # sched: [1:0.50]
-; BTVER2-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [14:5.00]
-; BTVER2-NEXT: movl $7, %eax # sched: [1:0.50]
-; BTVER2-NEXT: movl $7, %edx # sched: [1:0.50]
-; BTVER2-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [19:5.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pcmpestrm:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: movl $7, %eax # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: movl $7, %edx # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pcmpestrm:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl $7, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: movl $7, %edx # sched: [1:0.25]
-; ZNVER1-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: movl $7, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: movl $7, %edx # sched: [1:0.25]
-; ZNVER1-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a1, i32 7, i8 7)
- %2 = load <16 x i8>, <16 x i8> *%a2, align 16
- %3 = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %1, i32 7, <16 x i8> %2, i32 7, i8 7)
- ret <16 x i8> %3
-}
-declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
-
-define i32 @test_pcmpistri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
-; GENERIC-LABEL: test_pcmpistri:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00]
-; GENERIC-NEXT: movl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [17:3.00]
-; GENERIC-NEXT: # kill: def $ecx killed $ecx def $rcx
-; GENERIC-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_pcmpistri:
-; SLM: # %bb.0:
-; SLM-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [17:17.00]
-; SLM-NEXT: movl %ecx, %eax # sched: [1:0.50]
-; SLM-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [17:17.00]
-; SLM-NEXT: # kill: def $ecx killed $ecx def $rcx
-; SLM-NEXT: leal (%rcx,%rax), %eax # sched: [1:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pcmpistri:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00]
-; SANDY-SSE-NEXT: movl %ecx, %eax # sched: [1:0.33]
-; SANDY-SSE-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [17:3.00]
-; SANDY-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
-; SANDY-SSE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pcmpistri:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00]
-; SANDY-NEXT: movl %ecx, %eax # sched: [1:0.33]
-; SANDY-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [17:3.00]
-; SANDY-NEXT: # kill: def $ecx killed $ecx def $rcx
-; SANDY-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pcmpistri:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00]
-; HASWELL-SSE-NEXT: movl %ecx, %eax # sched: [1:0.25]
-; HASWELL-SSE-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [17:3.00]
-; HASWELL-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
-; HASWELL-SSE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pcmpistri:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00]
-; HASWELL-NEXT: movl %ecx, %eax # sched: [1:0.25]
-; HASWELL-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [17:3.00]
-; HASWELL-NEXT: # kill: def $ecx killed $ecx def $rcx
-; HASWELL-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pcmpistri:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00]
-; BROADWELL-SSE-NEXT: movl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-SSE-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [16:3.00]
-; BROADWELL-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
-; BROADWELL-SSE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pcmpistri:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00]
-; BROADWELL-NEXT: movl %ecx, %eax # sched: [1:0.25]
-; BROADWELL-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [16:3.00]
-; BROADWELL-NEXT: # kill: def $ecx killed $ecx def $rcx
-; BROADWELL-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pcmpistri:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [10:3.00]
-; SKYLAKE-SSE-NEXT: movl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-SSE-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [16:3.00]
-; SKYLAKE-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
-; SKYLAKE-SSE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pcmpistri:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [10:3.00]
-; SKYLAKE-NEXT: movl %ecx, %eax # sched: [1:0.25]
-; SKYLAKE-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [16:3.00]
-; SKYLAKE-NEXT: # kill: def $ecx killed $ecx def $rcx
-; SKYLAKE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pcmpistri:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [10:3.00]
-; SKX-SSE-NEXT: movl %ecx, %eax # sched: [1:0.25]
-; SKX-SSE-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [16:3.00]
-; SKX-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
-; SKX-SSE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pcmpistri:
-; SKX: # %bb.0:
-; SKX-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [10:3.00]
-; SKX-NEXT: movl %ecx, %eax # sched: [1:0.25]
-; SKX-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [16:3.00]
-; SKX-NEXT: # kill: def $ecx killed $ecx def $rcx
-; SKX-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pcmpistri:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [14:1.00]
-; BDVER2-SSE-NEXT: movl %ecx, %eax # sched: [1:0.50]
-; BDVER2-SSE-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [19:1.00]
-; BDVER2-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
-; BDVER2-SSE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pcmpistri:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [14:1.00]
-; BDVER2-NEXT: movl %ecx, %eax # sched: [1:0.50]
-; BDVER2-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [19:1.00]
-; BDVER2-NEXT: # kill: def $ecx killed $ecx def $rcx
-; BDVER2-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pcmpistri:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [7:2.00]
-; BTVER2-SSE-NEXT: movl %ecx, %eax # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [12:2.00]
-; BTVER2-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
-; BTVER2-SSE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pcmpistri:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [7:2.00]
-; BTVER2-NEXT: movl %ecx, %eax # sched: [1:0.50]
-; BTVER2-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [12:2.00]
-; BTVER2-NEXT: # kill: def $ecx killed $ecx def $rcx
-; BTVER2-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pcmpistri:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: movl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
-; ZNVER1-SSE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pcmpistri:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: movl %ecx, %eax # sched: [1:0.25]
-; ZNVER1-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: # kill: def $ecx killed $ecx def $rcx
-; ZNVER1-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7)
- %2 = load <16 x i8>, <16 x i8> *%a2, align 16
- %3 = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %2, i8 7)
- %4 = add i32 %1, %3
- ret i32 %4
-}
-declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone
-
-define <16 x i8> @test_pcmpistrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
-; GENERIC-LABEL: test_pcmpistrm:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00]
-; GENERIC-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_pcmpistrm:
-; SLM: # %bb.0:
-; SLM-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [13:13.00]
-; SLM-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [13:13.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pcmpistrm:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00]
-; SANDY-SSE-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pcmpistrm:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00]
-; SANDY-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pcmpistrm:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00]
-; HASWELL-SSE-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pcmpistrm:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00]
-; HASWELL-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pcmpistrm:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00]
-; BROADWELL-SSE-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [16:3.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pcmpistrm:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00]
-; BROADWELL-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [16:3.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pcmpistrm:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [10:3.00]
-; SKYLAKE-SSE-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [16:3.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pcmpistrm:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [10:3.00]
-; SKYLAKE-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [16:3.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pcmpistrm:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [10:3.00]
-; SKX-SSE-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [16:3.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pcmpistrm:
-; SKX: # %bb.0:
-; SKX-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [10:3.00]
-; SKX-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [16:3.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pcmpistrm:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [6:1.00]
-; BDVER2-SSE-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [11:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pcmpistrm:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [6:1.00]
-; BDVER2-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [11:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pcmpistrm:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [8:2.00]
-; BTVER2-SSE-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [13:2.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pcmpistrm:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [8:2.00]
-; BTVER2-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [13:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pcmpistrm:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pcmpistrm:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7)
- %2 = load <16 x i8>, <16 x i8> *%a2, align 16
- %3 = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %1, <16 x i8> %2, i8 7)
- ret <16 x i8> %3
-}
-declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone
-
-define <2 x i64> @test_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
-; GENERIC-LABEL: test_pcmpgtq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_pcmpgtq:
-; SLM: # %bb.0:
-; SLM-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pcmpgtq:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-SSE-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [11:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pcmpgtq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pcmpgtq:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [5:1.00]
-; HASWELL-SSE-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [11:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pcmpgtq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pcmpgtq:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [5:1.00]
-; BROADWELL-SSE-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [10:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pcmpgtq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BROADWELL-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pcmpgtq:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [3:1.00]
-; SKYLAKE-SSE-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [9:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pcmpgtq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SKYLAKE-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pcmpgtq:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [3:1.00]
-; SKX-SSE-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [9:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pcmpgtq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; SKX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pcmpgtq:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pcmpgtq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pcmpgtq:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pcmpgtq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pcmpgtq:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [1:0.50]
-; ZNVER1-SSE-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pcmpgtq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; ZNVER1-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = icmp sgt <2 x i64> %a0, %a1
- %2 = sext <2 x i1> %1 to <2 x i64>
- %3 = load <2 x i64>, <2 x i64>*%a2, align 16
- %4 = icmp sgt <2 x i64> %2, %3
- %5 = sext <2 x i1> %4 to <2 x i64>
- ret <2 x i64> %5
-}
-
-define <2 x i64> @test_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
-; GENERIC-LABEL: test_pclmulqdq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [14:6.00]
-; GENERIC-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [14:5.67]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; SLM-LABEL: test_pclmulqdq:
-; SLM: # %bb.0:
-; SLM-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [10:10.00]
-; SLM-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [10:10.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pclmulqdq:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [14:6.00]
-; SANDY-SSE-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [14:5.67]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pclmulqdq:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [14:6.00]
-; SANDY-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [14:5.67]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pclmulqdq:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [11:2.00]
-; HASWELL-SSE-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [17:2.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pclmulqdq:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [11:2.00]
-; HASWELL-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [17:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pclmulqdq:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [5:1.00]
-; BROADWELL-SSE-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [10:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pclmulqdq:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BROADWELL-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pclmulqdq:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [6:1.00]
-; SKYLAKE-SSE-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [12:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pclmulqdq:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [6:1.00]
-; SKYLAKE-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [12:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pclmulqdq:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [6:1.00]
-; SKX-SSE-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [12:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pclmulqdq:
-; SKX: # %bb.0:
-; SKX-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [6:1.00]
-; SKX-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [12:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pclmulqdq:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [12:1.00]
-; BDVER2-SSE-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [17:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pclmulqdq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [13:1.00]
-; BDVER2-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [17:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pclmulqdq:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-SSE-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pclmulqdq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pclmulqdq:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pclmulqdq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = load <2 x i64>, <2 x i64> *%a2, align 16
- %2 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0)
- %3 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %1, <2 x i64> %2, i8 0)
- ret <2 x i64> %3
-}
-declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8)
diff --git a/llvm/test/CodeGen/X86/sse4a-schedule.ll b/llvm/test/CodeGen/X86/sse4a-schedule.ll
deleted file mode 100644
index ad76845a73d..00000000000
--- a/llvm/test/CodeGen/X86/sse4a-schedule.ll
+++ /dev/null
@@ -1,156 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4a | FileCheck %s --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=BDVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=ZNVER1
-
-define <2 x i64> @test_extrq(<2 x i64> %a0, <16 x i8> %a1) {
-; GENERIC-LABEL: test_extrq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: extrq %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_extrq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: extrq %xmm1, %xmm0 # sched: [3:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_extrq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: extrq %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_extrq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: extrq %xmm1, %xmm0 # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %a0, <16 x i8> %a1)
- ret <2 x i64> %1
-}
-declare <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64>, <16 x i8>)
-
-define <2 x i64> @test_extrqi(<2 x i64> %a0) {
-; GENERIC-LABEL: test_extrqi:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: extrq $2, $3, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_extrqi:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: extrq $2, $3, %xmm0 # sched: [3:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_extrqi:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: extrq $2, $3, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_extrqi:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: extrq $2, $3, %xmm0 # sched: [2:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %a0, i8 3, i8 2)
- ret <2 x i64> %1
-}
-declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8)
-
-define <2 x i64> @test_insertq(<2 x i64> %a0, <2 x i64> %a1) {
-; GENERIC-LABEL: test_insertq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: insertq %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_insertq:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: insertq %xmm1, %xmm0 # sched: [3:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_insertq:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: insertq %xmm1, %xmm0 # sched: [2:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_insertq:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: insertq %xmm1, %xmm0 # sched: [4:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %a0, <2 x i64> %a1)
- ret <2 x i64> %1
-}
-declare <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64>, <2 x i64>)
-
-define <2 x i64> @test_insertqi(<2 x i64> %a0, <2 x i64> %a1) {
-; GENERIC-LABEL: test_insertqi:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: insertq $6, $5, %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_insertqi:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: insertq $6, $5, %xmm1, %xmm0 # sched: [3:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_insertqi:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: insertq $6, $5, %xmm1, %xmm0 # sched: [2:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_insertqi:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: insertq $6, $5, %xmm1, %xmm0 # sched: [4:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %a0, <2 x i64> %a1, i8 5, i8 6)
- ret <2 x i64> %1
-}
-declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8)
-
-define void @test_movntsd(i8* %p, <2 x double> %a) {
-; GENERIC-LABEL: test_movntsd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movntsd %xmm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_movntsd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movntsd %xmm0, (%rdi) # sched: [3:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_movntsd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movntsd %xmm0, (%rdi) # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_movntsd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movntsd %xmm0, (%rdi) # sched: [8:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void @llvm.x86.sse4a.movnt.sd(i8* %p, <2 x double> %a)
- ret void
-}
-declare void @llvm.x86.sse4a.movnt.sd(i8*, <2 x double>)
-
-define void @test_movntss(i8* %p, <4 x float> %a) {
-; GENERIC-LABEL: test_movntss:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movntss %xmm0, (%rdi) # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_movntss:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movntss %xmm0, (%rdi) # sched: [3:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_movntss:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movntss %xmm0, (%rdi) # sched: [3:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_movntss:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movntss %xmm0, (%rdi) # sched: [8:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- tail call void @llvm.x86.sse4a.movnt.ss(i8* %p, <4 x float> %a)
- ret void
-}
-declare void @llvm.x86.sse4a.movnt.ss(i8*, <4 x float>)
-
diff --git a/llvm/test/CodeGen/X86/ssse3-schedule.ll b/llvm/test/CodeGen/X86/ssse3-schedule.ll
deleted file mode 100644
index 5a871e27360..00000000000
--- a/llvm/test/CodeGen/X86/ssse3-schedule.ll
+++ /dev/null
@@ -1,2049 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefixes=CHECK,ATOM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SLM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1
-
-define <16 x i8> @test_pabsb(<16 x i8> %a0, <16 x i8> *%a1) {
-; GENERIC-LABEL: test_pabsb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50]
-; GENERIC-NEXT: pabsb (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pabsb:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT: pabsb (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pabsb:
-; SLM: # %bb.0:
-; SLM-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50]
-; SLM-NEXT: pabsb (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pabsb:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pabsb:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpabsb (%rdi), %xmm1 # sched: [7:0.50]
-; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pabsb:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pabsb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpabsb (%rdi), %xmm1 # sched: [7:0.50]
-; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pabsb:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pabsb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpabsb (%rdi), %xmm1 # sched: [6:0.50]
-; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pabsb:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pabsb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpabsb (%rdi), %xmm1 # sched: [7:0.50]
-; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pabsb:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50]
-; SKX-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pabsb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpabsb (%rdi), %xmm1 # sched: [7:0.50]
-; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pabsb:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pabsb:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpabsb (%rdi), %xmm1 # sched: [7:0.50]
-; BDVER2-NEXT: vpabsb %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pabsb:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pabsb:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpabsb (%rdi), %xmm1 # sched: [6:1.00]
-; BTVER2-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pabsb:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pabsb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpabsb (%rdi), %xmm1 # sched: [8:0.50]
-; ZNVER1-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0)
- %2 = load <16 x i8>, <16 x i8> *%a1, align 16
- %3 = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %2)
- %4 = or <16 x i8> %1, %3
- ret <16 x i8> %4
-}
-declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
-
-define <4 x i32> @test_pabsd(<4 x i32> %a0, <4 x i32> *%a1) {
-; GENERIC-LABEL: test_pabsd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50]
-; GENERIC-NEXT: pabsd (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pabsd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT: pabsd (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pabsd:
-; SLM: # %bb.0:
-; SLM-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50]
-; SLM-NEXT: pabsd (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pabsd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pabsd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpabsd (%rdi), %xmm1 # sched: [7:0.50]
-; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pabsd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pabsd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpabsd (%rdi), %xmm1 # sched: [7:0.50]
-; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pabsd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pabsd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpabsd (%rdi), %xmm1 # sched: [6:0.50]
-; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pabsd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pabsd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpabsd (%rdi), %xmm1 # sched: [7:0.50]
-; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pabsd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50]
-; SKX-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pabsd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpabsd (%rdi), %xmm1 # sched: [7:0.50]
-; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pabsd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pabsd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpabsd (%rdi), %xmm1 # sched: [7:0.50]
-; BDVER2-NEXT: vpabsd %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pabsd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pabsd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpabsd (%rdi), %xmm1 # sched: [6:1.00]
-; BTVER2-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pabsd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pabsd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpabsd (%rdi), %xmm1 # sched: [8:0.50]
-; ZNVER1-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0)
- %2 = load <4 x i32>, <4 x i32> *%a1, align 16
- %3 = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %2)
- %4 = or <4 x i32> %1, %3
- ret <4 x i32> %4
-}
-declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
-
-define <8 x i16> @test_pabsw(<8 x i16> %a0, <8 x i16> *%a1) {
-; GENERIC-LABEL: test_pabsw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50]
-; GENERIC-NEXT: pabsw (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pabsw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50]
-; ATOM-NEXT: pabsw (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pabsw:
-; SLM: # %bb.0:
-; SLM-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50]
-; SLM-NEXT: pabsw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pabsw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50]
-; SANDY-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pabsw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpabsw (%rdi), %xmm1 # sched: [7:0.50]
-; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pabsw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pabsw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpabsw (%rdi), %xmm1 # sched: [7:0.50]
-; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pabsw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pabsw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpabsw (%rdi), %xmm1 # sched: [6:0.50]
-; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pabsw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pabsw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpabsw (%rdi), %xmm1 # sched: [7:0.50]
-; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pabsw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50]
-; SKX-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pabsw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpabsw (%rdi), %xmm1 # sched: [7:0.50]
-; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pabsw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pabsw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpabsw (%rdi), %xmm1 # sched: [7:0.50]
-; BDVER2-NEXT: vpabsw %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pabsw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pabsw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpabsw (%rdi), %xmm1 # sched: [6:1.00]
-; BTVER2-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pabsw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pabsw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpabsw (%rdi), %xmm1 # sched: [8:0.50]
-; ZNVER1-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0)
- %2 = load <8 x i16>, <8 x i16> *%a1, align 16
- %3 = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %2)
- %4 = or <8 x i16> %1, %3
- ret <8 x i16> %4
-}
-declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
-
-define <8 x i16> @test_palignr(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_palignr:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50]
-; GENERIC-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50]
-; GENERIC-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_palignr:
-; ATOM: # %bb.0:
-; ATOM-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00]
-; ATOM-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [1:1.00]
-; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_palignr:
-; SLM: # %bb.0:
-; SLM-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00]
-; SLM-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [4:1.00]
-; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_palignr:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50]
-; SANDY-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50]
-; SANDY-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_palignr:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50]
-; SANDY-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_palignr:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00]
-; HASWELL-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00]
-; HASWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_palignr:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00]
-; HASWELL-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_palignr:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00]
-; BROADWELL-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [6:1.00]
-; BROADWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_palignr:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00]
-; BROADWELL-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [6:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_palignr:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00]
-; SKYLAKE-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_palignr:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00]
-; SKYLAKE-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_palignr:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00]
-; SKX-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00]
-; SKX-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_palignr:
-; SKX: # %bb.0:
-; SKX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00]
-; SKX-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_palignr:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [2:0.50]
-; BDVER2-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50]
-; BDVER2-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_palignr:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [2:0.50]
-; BDVER2-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_palignr:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50]
-; BTVER2-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [6:1.00]
-; BTVER2-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_palignr:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50]
-; BTVER2-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_palignr:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.25]
-; ZNVER1-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [8:0.50]
-; ZNVER1-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_palignr:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.25]
-; ZNVER1-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
- %2 = load <8 x i16>, <8 x i16> *%a2, align 16
- %3 = shufflevector <8 x i16> %2, <8 x i16> %1, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
- ret <8 x i16> %3
-}
-
-define <4 x i32> @test_phaddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_phaddd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: phaddd %xmm1, %xmm0 # sched: [3:1.50]
-; GENERIC-NEXT: phaddd (%rdi), %xmm0 # sched: [9:1.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_phaddd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: phaddd %xmm1, %xmm0 # sched: [3:1.50]
-; ATOM-NEXT: phaddd (%rdi), %xmm0 # sched: [4:2.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_phaddd:
-; SLM: # %bb.0:
-; SLM-NEXT: phaddd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: phaddd (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_phaddd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [3:1.50]
-; SANDY-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [9:1.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_phaddd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
-; SANDY-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_phaddd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [3:2.00]
-; HASWELL-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [9:2.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_phaddd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; HASWELL-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_phaddd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [3:2.00]
-; BROADWELL-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [8:2.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_phaddd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; BROADWELL-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_phaddd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [3:2.00]
-; SKYLAKE-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [9:2.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_phaddd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; SKYLAKE-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_phaddd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [3:2.00]
-; SKX-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [9:2.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_phaddd:
-; SKX: # %bb.0:
-; SKX-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; SKX-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_phaddd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [5:0.50]
-; BDVER2-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [10:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_phaddd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_phaddd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_phaddd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_phaddd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_phaddd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1)
- %2 = load <4 x i32>, <4 x i32> *%a2, align 16
- %3 = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %1, <4 x i32> %2)
- ret <4 x i32> %3
-}
-declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone
-
-define <8 x i16> @test_phaddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_phaddsw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: phaddsw %xmm1, %xmm0 # sched: [3:1.50]
-; GENERIC-NEXT: phaddsw (%rdi), %xmm0 # sched: [9:1.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_phaddsw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: phaddsw %xmm1, %xmm0 # sched: [7:3.50]
-; ATOM-NEXT: phaddsw (%rdi), %xmm0 # sched: [8:4.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_phaddsw:
-; SLM: # %bb.0:
-; SLM-NEXT: phaddsw %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: phaddsw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_phaddsw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [3:1.50]
-; SANDY-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [9:1.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_phaddsw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
-; SANDY-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_phaddsw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [3:2.00]
-; HASWELL-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [9:2.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_phaddsw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; HASWELL-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_phaddsw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [3:2.00]
-; BROADWELL-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [8:2.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_phaddsw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; BROADWELL-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_phaddsw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [3:2.00]
-; SKYLAKE-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [9:2.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_phaddsw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; SKYLAKE-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_phaddsw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [3:2.00]
-; SKX-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [9:2.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_phaddsw:
-; SKX: # %bb.0:
-; SKX-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; SKX-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_phaddsw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [5:0.50]
-; BDVER2-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [10:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_phaddsw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_phaddsw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_phaddsw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_phaddsw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_phaddsw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1)
- %2 = load <8 x i16>, <8 x i16> *%a2, align 16
- %3 = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %1, <8 x i16> %2)
- ret <8 x i16> %3
-}
-declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
-
-define <8 x i16> @test_phaddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_phaddw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: phaddw %xmm1, %xmm0 # sched: [3:1.50]
-; GENERIC-NEXT: phaddw (%rdi), %xmm0 # sched: [9:1.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_phaddw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: phaddw %xmm1, %xmm0 # sched: [7:3.50]
-; ATOM-NEXT: phaddw (%rdi), %xmm0 # sched: [8:4.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_phaddw:
-; SLM: # %bb.0:
-; SLM-NEXT: phaddw %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: phaddw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_phaddw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [3:1.50]
-; SANDY-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [9:1.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_phaddw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
-; SANDY-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_phaddw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [3:2.00]
-; HASWELL-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [9:2.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_phaddw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; HASWELL-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_phaddw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [3:2.00]
-; BROADWELL-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [8:2.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_phaddw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; BROADWELL-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_phaddw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [3:2.00]
-; SKYLAKE-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [9:2.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_phaddw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; SKYLAKE-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_phaddw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [3:2.00]
-; SKX-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [9:2.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_phaddw:
-; SKX: # %bb.0:
-; SKX-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; SKX-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_phaddw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [5:0.50]
-; BDVER2-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [10:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_phaddw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_phaddw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_phaddw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_phaddw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_phaddw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1)
- %2 = load <8 x i16>, <8 x i16> *%a2, align 16
- %3 = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %1, <8 x i16> %2)
- ret <8 x i16> %3
-}
-declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone
-
-define <4 x i32> @test_phsubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_phsubd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: phsubd %xmm1, %xmm0 # sched: [3:1.50]
-; GENERIC-NEXT: phsubd (%rdi), %xmm0 # sched: [9:1.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_phsubd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: phsubd %xmm1, %xmm0 # sched: [3:1.50]
-; ATOM-NEXT: phsubd (%rdi), %xmm0 # sched: [4:2.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_phsubd:
-; SLM: # %bb.0:
-; SLM-NEXT: phsubd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: phsubd (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_phsubd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [3:1.50]
-; SANDY-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [9:1.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_phsubd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
-; SANDY-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_phsubd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [3:2.00]
-; HASWELL-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [9:2.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_phsubd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; HASWELL-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_phsubd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [3:2.00]
-; BROADWELL-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [8:2.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_phsubd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; BROADWELL-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_phsubd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [3:2.00]
-; SKYLAKE-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [9:2.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_phsubd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; SKYLAKE-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_phsubd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [3:2.00]
-; SKX-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [9:2.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_phsubd:
-; SKX: # %bb.0:
-; SKX-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; SKX-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_phsubd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [5:0.50]
-; BDVER2-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [10:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_phsubd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_phsubd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_phsubd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_phsubd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_phsubd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1)
- %2 = load <4 x i32>, <4 x i32> *%a2, align 16
- %3 = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %1, <4 x i32> %2)
- ret <4 x i32> %3
-}
-declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone
-
-define <8 x i16> @test_phsubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_phsubsw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: phsubsw %xmm1, %xmm0 # sched: [3:1.50]
-; GENERIC-NEXT: phsubsw (%rdi), %xmm0 # sched: [9:1.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_phsubsw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: phsubsw %xmm1, %xmm0 # sched: [7:3.50]
-; ATOM-NEXT: phsubsw (%rdi), %xmm0 # sched: [8:4.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_phsubsw:
-; SLM: # %bb.0:
-; SLM-NEXT: phsubsw %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: phsubsw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_phsubsw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [3:1.50]
-; SANDY-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [9:1.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_phsubsw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
-; SANDY-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_phsubsw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [3:2.00]
-; HASWELL-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [9:2.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_phsubsw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; HASWELL-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_phsubsw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [3:2.00]
-; BROADWELL-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [8:2.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_phsubsw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; BROADWELL-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_phsubsw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [3:2.00]
-; SKYLAKE-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [9:2.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_phsubsw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; SKYLAKE-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_phsubsw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [3:2.00]
-; SKX-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [9:2.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_phsubsw:
-; SKX: # %bb.0:
-; SKX-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; SKX-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_phsubsw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [5:0.50]
-; BDVER2-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [10:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_phsubsw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_phsubsw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_phsubsw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_phsubsw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_phsubsw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1)
- %2 = load <8 x i16>, <8 x i16> *%a2, align 16
- %3 = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %1, <8 x i16> %2)
- ret <8 x i16> %3
-}
-declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
-
-define <8 x i16> @test_phsubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_phsubw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: phsubw %xmm1, %xmm0 # sched: [3:1.50]
-; GENERIC-NEXT: phsubw (%rdi), %xmm0 # sched: [9:1.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_phsubw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: phsubw %xmm1, %xmm0 # sched: [7:3.50]
-; ATOM-NEXT: phsubw (%rdi), %xmm0 # sched: [8:4.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_phsubw:
-; SLM: # %bb.0:
-; SLM-NEXT: phsubw %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: phsubw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_phsubw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [3:1.50]
-; SANDY-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [9:1.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_phsubw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
-; SANDY-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_phsubw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [3:2.00]
-; HASWELL-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [9:2.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_phsubw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; HASWELL-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_phsubw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [3:2.00]
-; BROADWELL-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [8:2.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_phsubw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; BROADWELL-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_phsubw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [3:2.00]
-; SKYLAKE-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [9:2.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_phsubw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; SKYLAKE-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_phsubw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [3:2.00]
-; SKX-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [9:2.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_phsubw:
-; SKX: # %bb.0:
-; SKX-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; SKX-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_phsubw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [5:0.50]
-; BDVER2-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [10:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_phsubw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
-; BDVER2-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_phsubw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [2:0.50]
-; BTVER2-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_phsubw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BTVER2-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_phsubw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [100:0.25]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_phsubw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [100:0.25]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1)
- %2 = load <8 x i16>, <8 x i16> *%a2, align 16
- %3 = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %1, <8 x i16> %2)
- ret <8 x i16> %3
-}
-declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
-
-define <8 x i16> @test_pmaddubsw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
-; GENERIC-LABEL: test_pmaddubsw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pmaddubsw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pmaddubsw:
-; SLM: # %bb.0:
-; SLM-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [4:1.00]
-; SLM-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pmaddubsw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [11:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pmaddubsw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pmaddubsw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [5:1.00]
-; HASWELL-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [11:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pmaddubsw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pmaddubsw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [5:1.00]
-; BROADWELL-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [10:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmaddubsw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BROADWELL-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pmaddubsw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmaddubsw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pmaddubsw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmaddubsw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pmaddubsw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [4:1.00]
-; BDVER2-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [9:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pmaddubsw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; BDVER2-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pmaddubsw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pmaddubsw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pmaddubsw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [4:1.00]
-; ZNVER1-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [11:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pmaddubsw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; ZNVER1-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1)
- %2 = load <16 x i8>, <16 x i8> *%a2, align 16
- %3 = bitcast <8 x i16> %1 to <16 x i8>
- %4 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %3, <16 x i8> %2)
- ret <8 x i16> %4
-}
-declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone
-
-define <8 x i16> @test_pmulhrsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_pmulhrsw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pmulhrsw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [5:5.00]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pmulhrsw:
-; SLM: # %bb.0:
-; SLM-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [4:1.00]
-; SLM-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [7:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pmulhrsw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [5:1.00]
-; SANDY-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [11:1.00]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pmulhrsw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; SANDY-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pmulhrsw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [5:1.00]
-; HASWELL-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [11:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pmulhrsw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; HASWELL-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pmulhrsw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [5:1.00]
-; BROADWELL-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [10:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pmulhrsw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; BROADWELL-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pmulhrsw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [10:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pmulhrsw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKYLAKE-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pmulhrsw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [4:0.50]
-; SKX-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [10:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pmulhrsw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
-; SKX-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pmulhrsw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [4:1.00]
-; BDVER2-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [9:1.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pmulhrsw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; BDVER2-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pmulhrsw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [2:1.00]
-; BTVER2-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [7:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pmulhrsw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
-; BTVER2-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pmulhrsw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [4:1.00]
-; ZNVER1-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [11:1.00]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pmulhrsw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; ZNVER1-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1)
- %2 = load <8 x i16>, <8 x i16> *%a2, align 16
- %3 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %1, <8 x i16> %2)
- ret <8 x i16> %3
-}
-declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
-
-define <16 x i8> @test_pshufb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
-; GENERIC-LABEL: test_pshufb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: pshufb %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: pshufb (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_pshufb:
-; ATOM: # %bb.0:
-; ATOM-NEXT: pshufb %xmm1, %xmm0 # sched: [4:2.00]
-; ATOM-NEXT: pshufb (%rdi), %xmm0 # sched: [5:2.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_pshufb:
-; SLM: # %bb.0:
-; SLM-NEXT: pshufb %xmm1, %xmm0 # sched: [1:1.00]
-; SLM-NEXT: pshufb (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_pshufb:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_pshufb:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_pshufb:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [1:1.00]
-; HASWELL-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [7:1.00]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_pshufb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; HASWELL-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_pshufb:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [1:1.00]
-; BROADWELL-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [6:1.00]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_pshufb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; BROADWELL-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_pshufb:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [7:1.00]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_pshufb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SKYLAKE-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_pshufb:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [1:1.00]
-; SKX-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [7:1.00]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_pshufb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; SKX-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_pshufb:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [3:2.00]
-; BDVER2-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [8:2.00]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_pshufb:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; BDVER2-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_pshufb:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [2:2.00]
-; BTVER2-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [7:2.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_pshufb:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
-; BTVER2-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_pshufb:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_pshufb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1)
- %2 = load <16 x i8>, <16 x i8> *%a2, align 16
- %3 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %1, <16 x i8> %2)
- ret <16 x i8> %3
-}
-declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone
-
-define <16 x i8> @test_psignb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
-; GENERIC-LABEL: test_psignb:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: psignb (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psignb:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: psignb (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psignb:
-; SLM: # %bb.0:
-; SLM-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: psignb (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_psignb:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_psignb:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_psignb:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_psignb:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_psignb:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psignb:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_psignb:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psignb:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_psignb:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psignb:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_psignb:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_psignb:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_psignb:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_psignb:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_psignb:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_psignb:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1)
- %2 = load <16 x i8>, <16 x i8> *%a2, align 16
- %3 = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %1, <16 x i8> %2)
- ret <16 x i8> %3
-}
-declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone
-
-define <4 x i32> @test_psignd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
-; GENERIC-LABEL: test_psignd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: psignd (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psignd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: psignd (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psignd:
-; SLM: # %bb.0:
-; SLM-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: psignd (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_psignd:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_psignd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_psignd:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_psignd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_psignd:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psignd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_psignd:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psignd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_psignd:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psignd:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_psignd:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_psignd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_psignd:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_psignd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_psignd:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_psignd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1)
- %2 = load <4 x i32>, <4 x i32> *%a2, align 16
- %3 = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %1, <4 x i32> %2)
- ret <4 x i32> %3
-}
-declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone
-
-define <8 x i16> @test_psignw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
-; GENERIC-LABEL: test_psignw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: psignw (%rdi), %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; ATOM-LABEL: test_psignw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50]
-; ATOM-NEXT: psignw (%rdi), %xmm0 # sched: [1:1.00]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: nop # sched: [1:0.50]
-; ATOM-NEXT: retq # sched: [79:39.50]
-;
-; SLM-LABEL: test_psignw:
-; SLM: # %bb.0:
-; SLM-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50]
-; SLM-NEXT: psignw (%rdi), %xmm0 # sched: [4:1.00]
-; SLM-NEXT: retq # sched: [4:1.00]
-;
-; SANDY-SSE-LABEL: test_psignw:
-; SANDY-SSE: # %bb.0:
-; SANDY-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50]
-; SANDY-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [7:0.50]
-; SANDY-SSE-NEXT: retq # sched: [1:1.00]
-;
-; SANDY-LABEL: test_psignw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SANDY-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SANDY-NEXT: retq # sched: [1:1.00]
-;
-; HASWELL-SSE-LABEL: test_psignw:
-; HASWELL-SSE: # %bb.0:
-; HASWELL-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50]
-; HASWELL-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [7:0.50]
-; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; HASWELL-LABEL: test_psignw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; HASWELL-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; HASWELL-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-SSE-LABEL: test_psignw:
-; BROADWELL-SSE: # %bb.0:
-; BROADWELL-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50]
-; BROADWELL-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [6:0.50]
-; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_psignw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BROADWELL-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
-; BROADWELL-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-SSE-LABEL: test_psignw:
-; SKYLAKE-SSE: # %bb.0:
-; SKYLAKE-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50]
-; SKYLAKE-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [7:0.50]
-; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKYLAKE-LABEL: test_psignw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKYLAKE-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKYLAKE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-SSE-LABEL: test_psignw:
-; SKX-SSE: # %bb.0:
-; SKX-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50]
-; SKX-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [7:0.50]
-; SKX-SSE-NEXT: retq # sched: [7:1.00]
-;
-; SKX-LABEL: test_psignw:
-; SKX: # %bb.0:
-; SKX-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; SKX-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; SKX-NEXT: retq # sched: [7:1.00]
-;
-; BDVER2-SSE-LABEL: test_psignw:
-; BDVER2-SSE: # %bb.0:
-; BDVER2-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [2:0.50]
-; BDVER2-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER2-SSE-NEXT: retq # sched: [5:1.00]
-;
-; BDVER2-LABEL: test_psignw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER2-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BTVER2-SSE-LABEL: test_psignw:
-; BTVER2-SSE: # %bb.0:
-; BTVER2-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50]
-; BTVER2-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [6:1.00]
-; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
-;
-; BTVER2-LABEL: test_psignw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; BTVER2-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
-; BTVER2-NEXT: retq # sched: [4:1.00]
-;
-; ZNVER1-SSE-LABEL: test_psignw:
-; ZNVER1-SSE: # %bb.0:
-; ZNVER1-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.25]
-; ZNVER1-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
-;
-; ZNVER1-LABEL: test_psignw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
-; ZNVER1-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [1:0.50]
- %1 = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1)
- %2 = load <8 x i16>, <8 x i16> *%a2, align 16
- %3 = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %1, <8 x i16> %2)
- ret <8 x i16> %3
-}
-declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone
diff --git a/llvm/test/CodeGen/X86/tbm-schedule.ll b/llvm/test/CodeGen/X86/tbm-schedule.ll
deleted file mode 100644
index b8f9bb08f3e..00000000000
--- a/llvm/test/CodeGen/X86/tbm-schedule.ll
+++ /dev/null
@@ -1,773 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+tbm | FileCheck %s --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver3 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER3
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver4 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER4
-
-define i32 @test_x86_tbm_bextri_u32(i32 %a0, i32* nocapture %p1) nounwind {
-; GENERIC-LABEL: test_x86_tbm_bextri_u32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
-; GENERIC-NEXT: # sched: [2:1.00]
-; GENERIC-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
-; GENERIC-NEXT: # sched: [7:1.00]
-; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_x86_tbm_bextri_u32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
-; BDVER2-NEXT: # sched: [6:0.50]
-; BDVER2-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
-; BDVER2-NEXT: # sched: [2:0.50]
-; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_x86_tbm_bextri_u32:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
-; BDVER3-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
-; BDVER3-NEXT: addl %ecx, %eax
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_x86_tbm_bextri_u32:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
-; BDVER4-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
-; BDVER4-NEXT: addl %ecx, %eax
-; BDVER4-NEXT: retq
- %a1 = load i32, i32* %p1
- %r0 = lshr i32 %a0, 4
- %m0 = lshr i32 %a1, 4
- %r1 = and i32 %r0, 4095
- %m1 = and i32 %m0, 4095
- %res = add i32 %r1, %m1
- ret i32 %res
-}
-
-define i64 @test_x86_tbm_bextri_u64(i64 %a0, i64* nocapture %p1) nounwind {
-; GENERIC-LABEL: test_x86_tbm_bextri_u64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
-; GENERIC-NEXT: # sched: [2:1.00]
-; GENERIC-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
-; GENERIC-NEXT: # sched: [7:1.00]
-; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_x86_tbm_bextri_u64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
-; BDVER2-NEXT: # sched: [6:0.50]
-; BDVER2-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
-; BDVER2-NEXT: # sched: [2:0.50]
-; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_x86_tbm_bextri_u64:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
-; BDVER3-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
-; BDVER3-NEXT: addq %rcx, %rax
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_x86_tbm_bextri_u64:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
-; BDVER4-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
-; BDVER4-NEXT: addq %rcx, %rax
-; BDVER4-NEXT: retq
- %a1 = load i64, i64* %p1
- %r0 = lshr i64 %a0, 4
- %m0 = lshr i64 %a1, 4
- %r1 = and i64 %r0, 4095
- %m1 = and i64 %m0, 4095
- %res = add i64 %r1, %m1
- ret i64 %res
-}
-
-define i32 @test_x86_tbm_blcfill_u32(i32 %a0, i32* nocapture %p1) nounwind {
-; GENERIC-LABEL: test_x86_tbm_blcfill_u32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: blcfilll %edi, %ecx # sched: [1:0.33]
-; GENERIC-NEXT: blcfilll (%rsi), %eax # sched: [6:0.50]
-; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_x86_tbm_blcfill_u32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: blcfilll (%rsi), %eax # sched: [6:0.50]
-; BDVER2-NEXT: blcfilll %edi, %ecx # sched: [2:0.50]
-; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_x86_tbm_blcfill_u32:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: blcfilll %edi, %ecx
-; BDVER3-NEXT: blcfilll (%rsi), %eax
-; BDVER3-NEXT: addl %ecx, %eax
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_x86_tbm_blcfill_u32:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: blcfilll %edi, %ecx
-; BDVER4-NEXT: blcfilll (%rsi), %eax
-; BDVER4-NEXT: addl %ecx, %eax
-; BDVER4-NEXT: retq
- %a1 = load i32, i32* %p1
- %r0 = add i32 %a0, 1
- %m0 = add i32 %a1, 1
- %r1 = and i32 %r0, %a0
- %m1 = and i32 %m0, %a1
- %res = add i32 %r1, %m1
- ret i32 %res
-}
-
-define i64 @test_x86_tbm_blcfill_u64(i64 %a0, i64* nocapture %p1) nounwind {
-; GENERIC-LABEL: test_x86_tbm_blcfill_u64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: blcfillq %rdi, %rcx # sched: [1:0.33]
-; GENERIC-NEXT: blcfillq (%rsi), %rax # sched: [6:0.50]
-; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_x86_tbm_blcfill_u64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: blcfillq (%rsi), %rax # sched: [6:0.50]
-; BDVER2-NEXT: blcfillq %rdi, %rcx # sched: [2:0.50]
-; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_x86_tbm_blcfill_u64:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: blcfillq %rdi, %rcx
-; BDVER3-NEXT: blcfillq (%rsi), %rax
-; BDVER3-NEXT: addq %rcx, %rax
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_x86_tbm_blcfill_u64:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: blcfillq %rdi, %rcx
-; BDVER4-NEXT: blcfillq (%rsi), %rax
-; BDVER4-NEXT: addq %rcx, %rax
-; BDVER4-NEXT: retq
- %a1 = load i64, i64* %p1
- %r0 = add i64 %a0, 1
- %m0 = add i64 %a1, 1
- %r1 = and i64 %r0, %a0
- %m1 = and i64 %m0, %a1
- %res = add i64 %r1, %m1
- ret i64 %res
-}
-
-define i32 @test_x86_tbm_blci_u32(i32 %a0, i32* nocapture %p1) nounwind {
-; GENERIC-LABEL: test_x86_tbm_blci_u32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: blcil %edi, %ecx # sched: [1:0.33]
-; GENERIC-NEXT: blcil (%rsi), %eax # sched: [6:0.50]
-; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_x86_tbm_blci_u32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: blcil (%rsi), %eax # sched: [6:0.50]
-; BDVER2-NEXT: blcil %edi, %ecx # sched: [2:0.50]
-; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_x86_tbm_blci_u32:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: blcil %edi, %ecx
-; BDVER3-NEXT: blcil (%rsi), %eax
-; BDVER3-NEXT: addl %ecx, %eax
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_x86_tbm_blci_u32:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: blcil %edi, %ecx
-; BDVER4-NEXT: blcil (%rsi), %eax
-; BDVER4-NEXT: addl %ecx, %eax
-; BDVER4-NEXT: retq
- %a1 = load i32, i32* %p1
- %r0 = add i32 1, %a0
- %m0 = add i32 1, %a1
- %r1 = xor i32 %r0, -1
- %m1 = xor i32 %m0, -1
- %r2 = or i32 %r1, %a0
- %m2 = or i32 %m1, %a1
- %res = add i32 %r2, %m2
- ret i32 %res
-}
-
-define i64 @test_x86_tbm_blci_u64(i64 %a0, i64* nocapture %p1) nounwind {
-; GENERIC-LABEL: test_x86_tbm_blci_u64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: blciq %rdi, %rcx # sched: [1:0.33]
-; GENERIC-NEXT: blciq (%rsi), %rax # sched: [6:0.50]
-; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_x86_tbm_blci_u64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: blciq (%rsi), %rax # sched: [6:0.50]
-; BDVER2-NEXT: blciq %rdi, %rcx # sched: [2:0.50]
-; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_x86_tbm_blci_u64:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: blciq %rdi, %rcx
-; BDVER3-NEXT: blciq (%rsi), %rax
-; BDVER3-NEXT: addq %rcx, %rax
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_x86_tbm_blci_u64:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: blciq %rdi, %rcx
-; BDVER4-NEXT: blciq (%rsi), %rax
-; BDVER4-NEXT: addq %rcx, %rax
-; BDVER4-NEXT: retq
- %a1 = load i64, i64* %p1
- %r0 = add i64 1, %a0
- %m0 = add i64 1, %a1
- %r1 = xor i64 %r0, -1
- %m1 = xor i64 %m0, -1
- %r2 = or i64 %r1, %a0
- %m2 = or i64 %m1, %a1
- %res = add i64 %r2, %m2
- ret i64 %res
-}
-
-define i32 @test_x86_tbm_blcic_u32(i32 %a0, i32* nocapture %p1) nounwind {
-; GENERIC-LABEL: test_x86_tbm_blcic_u32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: blcicl %edi, %ecx # sched: [1:0.33]
-; GENERIC-NEXT: blcicl (%rsi), %eax # sched: [6:0.50]
-; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_x86_tbm_blcic_u32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: blcicl (%rsi), %eax # sched: [6:0.50]
-; BDVER2-NEXT: blcicl %edi, %ecx # sched: [2:0.50]
-; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_x86_tbm_blcic_u32:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: blcicl %edi, %ecx
-; BDVER3-NEXT: blcicl (%rsi), %eax
-; BDVER3-NEXT: addl %ecx, %eax
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_x86_tbm_blcic_u32:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: blcicl %edi, %ecx
-; BDVER4-NEXT: blcicl (%rsi), %eax
-; BDVER4-NEXT: addl %ecx, %eax
-; BDVER4-NEXT: retq
- %a1 = load i32, i32* %p1
- %r0 = xor i32 %a0, -1
- %m0 = xor i32 %a1, -1
- %r1 = add i32 %a0, 1
- %m1 = add i32 %a1, 1
- %r2 = and i32 %r1, %r0
- %m2 = and i32 %m1, %m0
- %res = add i32 %r2, %m2
- ret i32 %res
-}
-
-define i64 @test_x86_tbm_blcic_u64(i64 %a0, i64* nocapture %p1) nounwind {
-; GENERIC-LABEL: test_x86_tbm_blcic_u64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: blcicq %rdi, %rcx # sched: [1:0.33]
-; GENERIC-NEXT: blcicq (%rsi), %rax # sched: [6:0.50]
-; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_x86_tbm_blcic_u64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: blcicq (%rsi), %rax # sched: [6:0.50]
-; BDVER2-NEXT: blcicq %rdi, %rcx # sched: [2:0.50]
-; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_x86_tbm_blcic_u64:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: blcicq %rdi, %rcx
-; BDVER3-NEXT: blcicq (%rsi), %rax
-; BDVER3-NEXT: addq %rcx, %rax
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_x86_tbm_blcic_u64:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: blcicq %rdi, %rcx
-; BDVER4-NEXT: blcicq (%rsi), %rax
-; BDVER4-NEXT: addq %rcx, %rax
-; BDVER4-NEXT: retq
- %a1 = load i64, i64* %p1
- %r0 = xor i64 %a0, -1
- %m0 = xor i64 %a1, -1
- %r1 = add i64 %a0, 1
- %m1 = add i64 %a1, 1
- %r2 = and i64 %r1, %r0
- %m2 = and i64 %m1, %m0
- %res = add i64 %r2, %m2
- ret i64 %res
-}
-
-define i32 @test_x86_tbm_blcmsk_u32(i32 %a0, i32* nocapture %p1) nounwind {
-; GENERIC-LABEL: test_x86_tbm_blcmsk_u32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: blcmskl %edi, %ecx # sched: [1:0.33]
-; GENERIC-NEXT: blcmskl (%rsi), %eax # sched: [6:0.50]
-; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_x86_tbm_blcmsk_u32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: blcmskl (%rsi), %eax # sched: [6:0.50]
-; BDVER2-NEXT: blcmskl %edi, %ecx # sched: [2:0.50]
-; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_x86_tbm_blcmsk_u32:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: blcmskl %edi, %ecx
-; BDVER3-NEXT: blcmskl (%rsi), %eax
-; BDVER3-NEXT: addl %ecx, %eax
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_x86_tbm_blcmsk_u32:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: blcmskl %edi, %ecx
-; BDVER4-NEXT: blcmskl (%rsi), %eax
-; BDVER4-NEXT: addl %ecx, %eax
-; BDVER4-NEXT: retq
- %a1 = load i32, i32* %p1
- %r0 = add i32 %a0, 1
- %m0 = add i32 %a1, 1
- %r1 = xor i32 %r0, %a0
- %m1 = xor i32 %m0, %a1
- %res = add i32 %r1, %m1
- ret i32 %res
-}
-
-define i64 @test_x86_tbm_blcmsk_u64(i64 %a0, i64* nocapture %p1) nounwind {
-; GENERIC-LABEL: test_x86_tbm_blcmsk_u64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: blcmskq %rdi, %rcx # sched: [1:0.33]
-; GENERIC-NEXT: blcmskq (%rsi), %rax # sched: [6:0.50]
-; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_x86_tbm_blcmsk_u64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: blcmskq (%rsi), %rax # sched: [6:0.50]
-; BDVER2-NEXT: blcmskq %rdi, %rcx # sched: [2:0.50]
-; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_x86_tbm_blcmsk_u64:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: blcmskq %rdi, %rcx
-; BDVER3-NEXT: blcmskq (%rsi), %rax
-; BDVER3-NEXT: addq %rcx, %rax
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_x86_tbm_blcmsk_u64:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: blcmskq %rdi, %rcx
-; BDVER4-NEXT: blcmskq (%rsi), %rax
-; BDVER4-NEXT: addq %rcx, %rax
-; BDVER4-NEXT: retq
- %a1 = load i64, i64* %p1
- %r0 = add i64 %a0, 1
- %m0 = add i64 %a1, 1
- %r1 = xor i64 %r0, %a0
- %m1 = xor i64 %m0, %a1
- %res = add i64 %r1, %m1
- ret i64 %res
-}
-
-define i32 @test_x86_tbm_blcs_u32(i32 %a0, i32* nocapture %p1) nounwind {
-; GENERIC-LABEL: test_x86_tbm_blcs_u32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: blcsl %edi, %ecx # sched: [1:0.33]
-; GENERIC-NEXT: blcsl (%rsi), %eax # sched: [6:0.50]
-; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_x86_tbm_blcs_u32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: blcsl (%rsi), %eax # sched: [6:0.50]
-; BDVER2-NEXT: blcsl %edi, %ecx # sched: [2:0.50]
-; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_x86_tbm_blcs_u32:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: blcsl %edi, %ecx
-; BDVER3-NEXT: blcsl (%rsi), %eax
-; BDVER3-NEXT: addl %ecx, %eax
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_x86_tbm_blcs_u32:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: blcsl %edi, %ecx
-; BDVER4-NEXT: blcsl (%rsi), %eax
-; BDVER4-NEXT: addl %ecx, %eax
-; BDVER4-NEXT: retq
- %a1 = load i32, i32* %p1
- %r0 = add i32 %a0, 1
- %m0 = add i32 %a1, 1
- %r1 = or i32 %r0, %a0
- %m1 = or i32 %m0, %a1
- %res = add i32 %r1, %m1
- ret i32 %res
-}
-
-define i64 @test_x86_tbm_blcs_u64(i64 %a0, i64* nocapture %p1) nounwind {
-; GENERIC-LABEL: test_x86_tbm_blcs_u64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: blcsq %rdi, %rcx # sched: [1:0.33]
-; GENERIC-NEXT: blcsq (%rsi), %rax # sched: [6:0.50]
-; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_x86_tbm_blcs_u64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: blcsq (%rsi), %rax # sched: [6:0.50]
-; BDVER2-NEXT: blcsq %rdi, %rcx # sched: [2:0.50]
-; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_x86_tbm_blcs_u64:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: blcsq %rdi, %rcx
-; BDVER3-NEXT: blcsq (%rsi), %rax
-; BDVER3-NEXT: addq %rcx, %rax
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_x86_tbm_blcs_u64:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: blcsq %rdi, %rcx
-; BDVER4-NEXT: blcsq (%rsi), %rax
-; BDVER4-NEXT: addq %rcx, %rax
-; BDVER4-NEXT: retq
- %a1 = load i64, i64* %p1
- %r0 = add i64 %a0, 1
- %m0 = add i64 %a1, 1
- %r1 = or i64 %r0, %a0
- %m1 = or i64 %m0, %a1
- %res = add i64 %r1, %m1
- ret i64 %res
-}
-
-define i32 @test_x86_tbm_blsfill_u32(i32 %a0, i32* nocapture %p1) nounwind {
-; GENERIC-LABEL: test_x86_tbm_blsfill_u32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: blsfilll %edi, %ecx # sched: [1:0.33]
-; GENERIC-NEXT: blsfilll (%rsi), %eax # sched: [6:0.50]
-; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_x86_tbm_blsfill_u32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: blsfilll (%rsi), %eax # sched: [6:0.50]
-; BDVER2-NEXT: blsfilll %edi, %ecx # sched: [2:0.50]
-; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_x86_tbm_blsfill_u32:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: blsfilll %edi, %ecx
-; BDVER3-NEXT: blsfilll (%rsi), %eax
-; BDVER3-NEXT: addl %ecx, %eax
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_x86_tbm_blsfill_u32:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: blsfilll %edi, %ecx
-; BDVER4-NEXT: blsfilll (%rsi), %eax
-; BDVER4-NEXT: addl %ecx, %eax
-; BDVER4-NEXT: retq
- %a1 = load i32, i32* %p1
- %r0 = add i32 %a0, -1
- %m0 = add i32 %a1, -1
- %r1 = or i32 %r0, %a0
- %m1 = or i32 %m0, %a1
- %res = add i32 %r1, %m1
- ret i32 %res
-}
-
-define i64 @test_x86_tbm_blsfill_u64(i64 %a0, i64* nocapture %p1) nounwind {
-; GENERIC-LABEL: test_x86_tbm_blsfill_u64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: blsfillq %rdi, %rcx # sched: [1:0.33]
-; GENERIC-NEXT: blsfillq (%rsi), %rax # sched: [6:0.50]
-; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_x86_tbm_blsfill_u64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: blsfillq (%rsi), %rax # sched: [6:0.50]
-; BDVER2-NEXT: blsfillq %rdi, %rcx # sched: [2:0.50]
-; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_x86_tbm_blsfill_u64:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: blsfillq %rdi, %rcx
-; BDVER3-NEXT: blsfillq (%rsi), %rax
-; BDVER3-NEXT: addq %rcx, %rax
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_x86_tbm_blsfill_u64:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: blsfillq %rdi, %rcx
-; BDVER4-NEXT: blsfillq (%rsi), %rax
-; BDVER4-NEXT: addq %rcx, %rax
-; BDVER4-NEXT: retq
- %a1 = load i64, i64* %p1
- %r0 = add i64 %a0, -1
- %m0 = add i64 %a1, -1
- %r1 = or i64 %r0, %a0
- %m1 = or i64 %m0, %a1
- %res = add i64 %r1, %m1
- ret i64 %res
-}
-
-define i32 @test_x86_tbm_blsic_u32(i32 %a0, i32* nocapture %p1) nounwind {
-; GENERIC-LABEL: test_x86_tbm_blsic_u32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: blsicl %edi, %ecx # sched: [1:0.33]
-; GENERIC-NEXT: blsicl (%rsi), %eax # sched: [6:0.50]
-; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_x86_tbm_blsic_u32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: blsicl (%rsi), %eax # sched: [6:0.50]
-; BDVER2-NEXT: blsicl %edi, %ecx # sched: [2:0.50]
-; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_x86_tbm_blsic_u32:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: blsicl %edi, %ecx
-; BDVER3-NEXT: blsicl (%rsi), %eax
-; BDVER3-NEXT: addl %ecx, %eax
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_x86_tbm_blsic_u32:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: blsicl %edi, %ecx
-; BDVER4-NEXT: blsicl (%rsi), %eax
-; BDVER4-NEXT: addl %ecx, %eax
-; BDVER4-NEXT: retq
- %a1 = load i32, i32* %p1
- %r0 = xor i32 %a0, -1
- %m0 = xor i32 %a1, -1
- %r1 = add i32 %a0, -1
- %m1 = add i32 %a1, -1
- %r2 = or i32 %r0, %r1
- %m2 = or i32 %m0, %m1
- %res = add i32 %r2, %m2
- ret i32 %res
-}
-
-define i64 @test_x86_tbm_blsic_u64(i64 %a0, i64* nocapture %p1) nounwind {
-; GENERIC-LABEL: test_x86_tbm_blsic_u64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: blsicq %rdi, %rcx # sched: [1:0.33]
-; GENERIC-NEXT: blsicq (%rsi), %rax # sched: [6:0.50]
-; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_x86_tbm_blsic_u64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: blsicq (%rsi), %rax # sched: [6:0.50]
-; BDVER2-NEXT: blsicq %rdi, %rcx # sched: [2:0.50]
-; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_x86_tbm_blsic_u64:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: blsicq %rdi, %rcx
-; BDVER3-NEXT: blsicq (%rsi), %rax
-; BDVER3-NEXT: addq %rcx, %rax
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_x86_tbm_blsic_u64:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: blsicq %rdi, %rcx
-; BDVER4-NEXT: blsicq (%rsi), %rax
-; BDVER4-NEXT: addq %rcx, %rax
-; BDVER4-NEXT: retq
- %a1 = load i64, i64* %p1
- %r0 = xor i64 %a0, -1
- %m0 = xor i64 %a1, -1
- %r1 = add i64 %a0, -1
- %m1 = add i64 %a1, -1
- %r2 = or i64 %r0, %r1
- %m2 = or i64 %m0, %m1
- %res = add i64 %r2, %m2
- ret i64 %res
-}
-
-define i32 @test_x86_tbm_t1mskc_u32(i32 %a0, i32* nocapture %p1) nounwind {
-; GENERIC-LABEL: test_x86_tbm_t1mskc_u32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: t1mskcl %edi, %ecx # sched: [1:0.33]
-; GENERIC-NEXT: t1mskcl (%rsi), %eax # sched: [6:0.50]
-; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_x86_tbm_t1mskc_u32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: t1mskcl (%rsi), %eax # sched: [6:0.50]
-; BDVER2-NEXT: t1mskcl %edi, %ecx # sched: [2:0.50]
-; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_x86_tbm_t1mskc_u32:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: t1mskcl %edi, %ecx
-; BDVER3-NEXT: t1mskcl (%rsi), %eax
-; BDVER3-NEXT: addl %ecx, %eax
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_x86_tbm_t1mskc_u32:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: t1mskcl %edi, %ecx
-; BDVER4-NEXT: t1mskcl (%rsi), %eax
-; BDVER4-NEXT: addl %ecx, %eax
-; BDVER4-NEXT: retq
- %a1 = load i32, i32* %p1
- %r0 = xor i32 %a0, -1
- %m0 = xor i32 %a1, -1
- %r1 = add i32 %a0, 1
- %m1 = add i32 %a1, 1
- %r2 = or i32 %r0, %r1
- %m2 = or i32 %m0, %m1
- %res = add i32 %r2, %m2
- ret i32 %res
-}
-
-define i64 @test_x86_tbm_t1mskc_u64(i64 %a0, i64* nocapture %p1) nounwind {
-; GENERIC-LABEL: test_x86_tbm_t1mskc_u64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: t1mskcq %rdi, %rcx # sched: [1:0.33]
-; GENERIC-NEXT: t1mskcq (%rsi), %rax # sched: [6:0.50]
-; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_x86_tbm_t1mskc_u64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: t1mskcq (%rsi), %rax # sched: [6:0.50]
-; BDVER2-NEXT: t1mskcq %rdi, %rcx # sched: [2:0.50]
-; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_x86_tbm_t1mskc_u64:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: t1mskcq %rdi, %rcx
-; BDVER3-NEXT: t1mskcq (%rsi), %rax
-; BDVER3-NEXT: addq %rcx, %rax
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_x86_tbm_t1mskc_u64:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: t1mskcq %rdi, %rcx
-; BDVER4-NEXT: t1mskcq (%rsi), %rax
-; BDVER4-NEXT: addq %rcx, %rax
-; BDVER4-NEXT: retq
- %a1 = load i64, i64* %p1
- %r0 = xor i64 %a0, -1
- %m0 = xor i64 %a1, -1
- %r1 = add i64 %a0, 1
- %m1 = add i64 %a1, 1
- %r2 = or i64 %r0, %r1
- %m2 = or i64 %m0, %m1
- %res = add i64 %r2, %m2
- ret i64 %res
-}
-
-define i32 @test_x86_tbm_tzmsk_u32(i32 %a0, i32* nocapture %p1) nounwind {
-; GENERIC-LABEL: test_x86_tbm_tzmsk_u32:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: tzmskl %edi, %ecx # sched: [1:0.33]
-; GENERIC-NEXT: tzmskl (%rsi), %eax # sched: [6:0.50]
-; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_x86_tbm_tzmsk_u32:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: tzmskl (%rsi), %eax # sched: [6:0.50]
-; BDVER2-NEXT: tzmskl %edi, %ecx # sched: [2:0.50]
-; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_x86_tbm_tzmsk_u32:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: tzmskl %edi, %ecx
-; BDVER3-NEXT: tzmskl (%rsi), %eax
-; BDVER3-NEXT: addl %ecx, %eax
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_x86_tbm_tzmsk_u32:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: tzmskl %edi, %ecx
-; BDVER4-NEXT: tzmskl (%rsi), %eax
-; BDVER4-NEXT: addl %ecx, %eax
-; BDVER4-NEXT: retq
- %a1 = load i32, i32* %p1
- %r0 = xor i32 %a0, -1
- %m0 = xor i32 %a1, -1
- %r1 = add i32 %a0, -1
- %m1 = add i32 %a1, -1
- %r2 = and i32 %r0, %r1
- %m2 = and i32 %m0, %m1
- %res = add i32 %r2, %m2
- ret i32 %res
-}
-
-define i64 @test_x86_tbm_tzmsk_u64(i64 %a0, i64* nocapture %p1) nounwind {
-; GENERIC-LABEL: test_x86_tbm_tzmsk_u64:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: tzmskq %rdi, %rcx # sched: [1:0.33]
-; GENERIC-NEXT: tzmskq (%rsi), %rax # sched: [6:0.50]
-; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER2-LABEL: test_x86_tbm_tzmsk_u64:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: tzmskq (%rsi), %rax # sched: [6:0.50]
-; BDVER2-NEXT: tzmskq %rdi, %rcx # sched: [2:0.50]
-; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
-; BDVER2-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_x86_tbm_tzmsk_u64:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: tzmskq %rdi, %rcx
-; BDVER3-NEXT: tzmskq (%rsi), %rax
-; BDVER3-NEXT: addq %rcx, %rax
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_x86_tbm_tzmsk_u64:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: tzmskq %rdi, %rcx
-; BDVER4-NEXT: tzmskq (%rsi), %rax
-; BDVER4-NEXT: addq %rcx, %rax
-; BDVER4-NEXT: retq
- %a1 = load i64, i64* %p1
- %r0 = xor i64 %a0, -1
- %m0 = xor i64 %a1, -1
- %r1 = add i64 %a0, -1
- %m1 = add i64 %a1, -1
- %r2 = and i64 %r0, %r1
- %m2 = and i64 %m0, %m1
- %res = add i64 %r2, %m2
- ret i64 %res
-}
diff --git a/llvm/test/CodeGen/X86/x87-schedule.ll b/llvm/test/CodeGen/X86/x87-schedule.ll
deleted file mode 100644
index 383fb412ef2..00000000000
--- a/llvm/test/CodeGen/X86/x87-schedule.ll
+++ /dev/null
@@ -1,6420 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=i686 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM
-; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
-; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
-; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
-; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
-; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
-; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
-
-define void @test_f2xm1() optsize {
-; GENERIC-LABEL: test_f2xm1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: f2xm1
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_f2xm1:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: f2xm1 # sched: [99:49.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_f2xm1:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: f2xm1 # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_f2xm1:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: f2xm1 # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_f2xm1:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: f2xm1 # sched: [100:0.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_f2xm1:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: f2xm1 # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_f2xm1:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: f2xm1 # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_f2xm1:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: f2xm1 # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_f2xm1:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: f2xm1 # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_f2xm1:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: f2xm1 # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_f2xm1:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: f2xm1 # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "f2xm1", ""() nounwind
- ret void
-}
-
-define void @test_fabs() optsize {
-; GENERIC-LABEL: test_fabs:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fabs
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fabs:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fabs # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fabs:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: fabs # sched: [1:0.50]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fabs:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fabs # sched: [1:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fabs:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fabs # sched: [1:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fabs:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fabs # sched: [1:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fabs:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fabs # sched: [1:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fabs:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: fabs # sched: [1:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fabs:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fabs # sched: [1:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fabs:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fabs # sched: [2:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fabs:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fabs # sched: [2:1.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fabs", ""() nounwind
- ret void
-}
-
-define void @test_fadd(float *%a0, double *%a1) optsize {
-; GENERIC-LABEL: test_fadd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fadd %st, %st(1)
-; GENERIC-NEXT: fadd %st(2)
-; GENERIC-NEXT: fadds (%ecx)
-; GENERIC-NEXT: faddl (%eax)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fadd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fadd %st, %st(1) # sched: [5:5.00]
-; ATOM-NEXT: fadd %st(2) # sched: [5:5.00]
-; ATOM-NEXT: fadds (%ecx) # sched: [5:5.00]
-; ATOM-NEXT: faddl (%eax) # sched: [5:5.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fadd:
-; SLM: # %bb.0:
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: fadd %st, %st(1) # sched: [3:1.00]
-; SLM-NEXT: fadd %st(2) # sched: [3:1.00]
-; SLM-NEXT: fadds (%ecx) # sched: [6:1.00]
-; SLM-NEXT: faddl (%eax) # sched: [6:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fadd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fadd %st, %st(1) # sched: [3:1.00]
-; SANDY-NEXT: fadd %st(2) # sched: [3:1.00]
-; SANDY-NEXT: fadds (%ecx) # sched: [10:1.00]
-; SANDY-NEXT: faddl (%eax) # sched: [10:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fadd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fadd %st, %st(1) # sched: [3:1.00]
-; HASWELL-NEXT: fadd %st(2) # sched: [3:1.00]
-; HASWELL-NEXT: fadds (%ecx) # sched: [10:1.00]
-; HASWELL-NEXT: faddl (%eax) # sched: [10:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fadd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fadd %st, %st(1) # sched: [3:1.00]
-; BROADWELL-NEXT: fadd %st(2) # sched: [3:1.00]
-; BROADWELL-NEXT: fadds (%ecx) # sched: [9:1.00]
-; BROADWELL-NEXT: faddl (%eax) # sched: [9:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fadd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fadd %st, %st(1) # sched: [3:1.00]
-; SKYLAKE-NEXT: fadd %st(2) # sched: [3:1.00]
-; SKYLAKE-NEXT: fadds (%ecx) # sched: [10:1.00]
-; SKYLAKE-NEXT: faddl (%eax) # sched: [10:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fadd:
-; SKX: # %bb.0:
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: fadd %st, %st(1) # sched: [3:1.00]
-; SKX-NEXT: fadd %st(2) # sched: [3:1.00]
-; SKX-NEXT: fadds (%ecx) # sched: [10:1.00]
-; SKX-NEXT: faddl (%eax) # sched: [10:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fadd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fadd %st, %st(1) # sched: [5:1.00]
-; BDVER2-NEXT: fadd %st(2) # sched: [5:1.00]
-; BDVER2-NEXT: fadds (%ecx) # sched: [10:1.00]
-; BDVER2-NEXT: faddl (%eax) # sched: [10:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fadd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fadd %st, %st(1) # sched: [3:1.00]
-; BTVER2-NEXT: fadd %st(2) # sched: [3:1.00]
-; BTVER2-NEXT: fadds (%ecx) # sched: [8:1.00]
-; BTVER2-NEXT: faddl (%eax) # sched: [8:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fadd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fadd %st, %st(1) # sched: [3:1.00]
-; ZNVER1-NEXT: fadd %st(2) # sched: [3:1.00]
-; ZNVER1-NEXT: fadds (%ecx) # sched: [10:1.00]
-; ZNVER1-NEXT: faddl (%eax) # sched: [10:1.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fadd %st(0), %st(1) \0A\09 fadd %st(2), %st(0) \0A\09 fadds $0 \0A\09 faddl $1", "*m,*m"(float *%a0, double *%a1) nounwind
- ret void
-}
-
-define void @test_faddp_fiadd(i16 *%a0, i32 *%a1) optsize {
-; GENERIC-LABEL: test_faddp_fiadd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: faddp %st(1)
-; GENERIC-NEXT: faddp %st(2)
-; GENERIC-NEXT: fiadds (%ecx)
-; GENERIC-NEXT: fiaddl (%eax)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_faddp_fiadd:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: faddp %st(1) # sched: [5:5.00]
-; ATOM-NEXT: faddp %st(2) # sched: [5:5.00]
-; ATOM-NEXT: fiadds (%ecx) # sched: [5:5.00]
-; ATOM-NEXT: fiaddl (%eax) # sched: [5:5.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_faddp_fiadd:
-; SLM: # %bb.0:
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: faddp %st(1) # sched: [3:1.00]
-; SLM-NEXT: faddp %st(2) # sched: [3:1.00]
-; SLM-NEXT: fiadds (%ecx) # sched: [6:1.00]
-; SLM-NEXT: fiaddl (%eax) # sched: [6:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_faddp_fiadd:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: faddp %st(1) # sched: [3:1.00]
-; SANDY-NEXT: faddp %st(2) # sched: [3:1.00]
-; SANDY-NEXT: fiadds (%ecx) # sched: [13:2.00]
-; SANDY-NEXT: fiaddl (%eax) # sched: [13:2.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_faddp_fiadd:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: faddp %st(1) # sched: [3:1.00]
-; HASWELL-NEXT: faddp %st(2) # sched: [3:1.00]
-; HASWELL-NEXT: fiadds (%ecx) # sched: [13:2.00]
-; HASWELL-NEXT: fiaddl (%eax) # sched: [13:2.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_faddp_fiadd:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: faddp %st(1) # sched: [3:1.00]
-; BROADWELL-NEXT: faddp %st(2) # sched: [3:1.00]
-; BROADWELL-NEXT: fiadds (%ecx) # sched: [12:2.00]
-; BROADWELL-NEXT: fiaddl (%eax) # sched: [12:2.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_faddp_fiadd:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: faddp %st(1) # sched: [3:1.00]
-; SKYLAKE-NEXT: faddp %st(2) # sched: [3:1.00]
-; SKYLAKE-NEXT: fiadds (%ecx) # sched: [13:2.00]
-; SKYLAKE-NEXT: fiaddl (%eax) # sched: [13:2.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_faddp_fiadd:
-; SKX: # %bb.0:
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: faddp %st(1) # sched: [3:1.00]
-; SKX-NEXT: faddp %st(2) # sched: [3:1.00]
-; SKX-NEXT: fiadds (%ecx) # sched: [13:2.00]
-; SKX-NEXT: fiaddl (%eax) # sched: [13:2.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_faddp_fiadd:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: faddp %st(1) # sched: [5:1.00]
-; BDVER2-NEXT: faddp %st(2) # sched: [5:1.00]
-; BDVER2-NEXT: fiadds (%ecx) # sched: [10:1.00]
-; BDVER2-NEXT: fiaddl (%eax) # sched: [10:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_faddp_fiadd:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: faddp %st(1) # sched: [3:1.00]
-; BTVER2-NEXT: faddp %st(2) # sched: [3:1.00]
-; BTVER2-NEXT: fiadds (%ecx) # sched: [8:1.00]
-; BTVER2-NEXT: fiaddl (%eax) # sched: [8:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_faddp_fiadd:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: faddp %st(1) # sched: [3:1.00]
-; ZNVER1-NEXT: faddp %st(2) # sched: [3:1.00]
-; ZNVER1-NEXT: fiadds (%ecx) # sched: [10:1.00]
-; ZNVER1-NEXT: fiaddl (%eax) # sched: [10:1.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "faddp \0A\09 faddp %st(2), %st(0) \0A\09 fiadds $0 \0A\09 fiaddl $1", "*m,*m"(i16 *%a0, i32 *%a1) nounwind
- ret void
-}
-
-define void @test_fbld_fbstp(i8* %a0) optsize {
-; GENERIC-LABEL: test_fbld_fbstp:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fbld (%eax)
-; GENERIC-NEXT: fbstp (%eax)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fbld_fbstp:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fbld (%eax) # sched: [100:0.50]
-; ATOM-NEXT: fbstp (%eax) # sched: [100:0.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fbld_fbstp:
-; SLM: # %bb.0:
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: fbld (%eax) # sched: [100:1.00]
-; SLM-NEXT: fbstp (%eax) # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fbld_fbstp:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fbld (%eax) # sched: [100:0.33]
-; SANDY-NEXT: fbstp (%eax) # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fbld_fbstp:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fbld (%eax) # sched: [47:10.75]
-; HASWELL-NEXT: fbstp (%eax) # sched: [1:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fbld_fbstp:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fbld (%eax) # sched: [100:0.25]
-; BROADWELL-NEXT: fbstp (%eax) # sched: [1:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fbld_fbstp:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fbld (%eax) # sched: [100:0.25]
-; SKYLAKE-NEXT: fbstp (%eax) # sched: [1:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fbld_fbstp:
-; SKX: # %bb.0:
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: fbld (%eax) # sched: [100:0.25]
-; SKX-NEXT: fbstp (%eax) # sched: [1:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fbld_fbstp:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fbld (%eax) # sched: [100:0.50]
-; BDVER2-NEXT: fbstp (%eax) # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fbld_fbstp:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fbld (%eax) # sched: [100:0.50]
-; BTVER2-NEXT: fbstp (%eax) # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fbld_fbstp:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fbld (%eax) # sched: [100:0.25]
-; ZNVER1-NEXT: fbstp (%eax) # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fbld $0 \0A\09 fbstp $0", "*m"(i8 *%a0) nounwind
- ret void
-}
-
-define void @test_fchs() optsize {
-; GENERIC-LABEL: test_fchs:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fchs
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fchs:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fchs # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fchs:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: fchs # sched: [1:0.50]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fchs:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fchs # sched: [1:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fchs:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fchs # sched: [1:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fchs:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fchs # sched: [1:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fchs:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fchs # sched: [1:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fchs:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: fchs # sched: [1:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fchs:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fchs # sched: [1:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fchs:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fchs # sched: [2:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fchs:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fchs # sched: [1:1.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fchs", ""() nounwind
- ret void
-}
-
-define void @test_fclex() optsize {
-; GENERIC-LABEL: test_fclex:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: wait
-; GENERIC-NEXT: fnclex
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fclex:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: wait # sched: [1:0.50]
-; ATOM-NEXT: fnclex # sched: [25:12.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fclex:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: wait # sched: [100:1.00]
-; SLM-NEXT: fnclex # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fclex:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: wait # sched: [100:0.33]
-; SANDY-NEXT: fnclex # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fclex:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: wait # sched: [2:0.50]
-; HASWELL-NEXT: fnclex # sched: [4:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fclex:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: wait # sched: [2:0.50]
-; BROADWELL-NEXT: fnclex # sched: [4:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fclex:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: wait # sched: [2:0.50]
-; SKYLAKE-NEXT: fnclex # sched: [4:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fclex:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: wait # sched: [2:0.50]
-; SKX-NEXT: fnclex # sched: [4:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fclex:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: wait # sched: [100:0.50]
-; BDVER2-NEXT: fnclex # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fclex:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: wait # sched: [100:0.50]
-; BTVER2-NEXT: fnclex # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fclex:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: wait # sched: [1:1.00]
-; ZNVER1-NEXT: fnclex # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fclex", ""() nounwind
- ret void
-}
-
-define void @test_fnclex() optsize {
-; GENERIC-LABEL: test_fnclex:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fnclex
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fnclex:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fnclex # sched: [25:12.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fnclex:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: fnclex # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fnclex:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fnclex # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fnclex:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fnclex # sched: [4:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fnclex:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fnclex # sched: [4:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fnclex:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fnclex # sched: [4:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fnclex:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: fnclex # sched: [4:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fnclex:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fnclex # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fnclex:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fnclex # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fnclex:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fnclex # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fnclex", ""() nounwind
- ret void
-}
-
-define void @test_fcmov() optsize {
-; GENERIC-LABEL: test_fcmov:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fcmovb %st(1), %st
-; GENERIC-NEXT: fcmovbe %st(1), %st
-; GENERIC-NEXT: fcmove %st(1), %st
-; GENERIC-NEXT: fcmovnb %st(1), %st
-; GENERIC-NEXT: fcmovnbe %st(1), %st
-; GENERIC-NEXT: fcmovne %st(1), %st
-; GENERIC-NEXT: fcmovnu %st(1), %st
-; GENERIC-NEXT: fcmovu %st(1), %st
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fcmov:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fcmovb %st(1), %st # sched: [9:4.50]
-; ATOM-NEXT: fcmovbe %st(1), %st # sched: [9:4.50]
-; ATOM-NEXT: fcmove %st(1), %st # sched: [9:4.50]
-; ATOM-NEXT: fcmovnb %st(1), %st # sched: [9:4.50]
-; ATOM-NEXT: fcmovnbe %st(1), %st # sched: [9:4.50]
-; ATOM-NEXT: fcmovne %st(1), %st # sched: [9:4.50]
-; ATOM-NEXT: fcmovnu %st(1), %st # sched: [9:4.50]
-; ATOM-NEXT: fcmovu %st(1), %st # sched: [9:4.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fcmov:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: fcmovb %st(1), %st # sched: [3:1.00]
-; SLM-NEXT: fcmovbe %st(1), %st # sched: [3:1.00]
-; SLM-NEXT: fcmove %st(1), %st # sched: [3:1.00]
-; SLM-NEXT: fcmovnb %st(1), %st # sched: [3:1.00]
-; SLM-NEXT: fcmovnbe %st(1), %st # sched: [3:1.00]
-; SLM-NEXT: fcmovne %st(1), %st # sched: [3:1.00]
-; SLM-NEXT: fcmovnu %st(1), %st # sched: [3:1.00]
-; SLM-NEXT: fcmovu %st(1), %st # sched: [3:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fcmov:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fcmovb %st(1), %st # sched: [3:2.00]
-; SANDY-NEXT: fcmovbe %st(1), %st # sched: [3:2.00]
-; SANDY-NEXT: fcmove %st(1), %st # sched: [3:2.00]
-; SANDY-NEXT: fcmovnb %st(1), %st # sched: [3:2.00]
-; SANDY-NEXT: fcmovnbe %st(1), %st # sched: [3:2.00]
-; SANDY-NEXT: fcmovne %st(1), %st # sched: [3:2.00]
-; SANDY-NEXT: fcmovnu %st(1), %st # sched: [3:2.00]
-; SANDY-NEXT: fcmovu %st(1), %st # sched: [3:2.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fcmov:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fcmovb %st(1), %st # sched: [3:1.00]
-; HASWELL-NEXT: fcmovbe %st(1), %st # sched: [3:1.00]
-; HASWELL-NEXT: fcmove %st(1), %st # sched: [3:1.00]
-; HASWELL-NEXT: fcmovnb %st(1), %st # sched: [3:1.00]
-; HASWELL-NEXT: fcmovnbe %st(1), %st # sched: [3:1.00]
-; HASWELL-NEXT: fcmovne %st(1), %st # sched: [3:1.00]
-; HASWELL-NEXT: fcmovnu %st(1), %st # sched: [3:1.00]
-; HASWELL-NEXT: fcmovu %st(1), %st # sched: [3:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fcmov:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fcmovb %st(1), %st # sched: [3:1.00]
-; BROADWELL-NEXT: fcmovbe %st(1), %st # sched: [3:1.00]
-; BROADWELL-NEXT: fcmove %st(1), %st # sched: [3:1.00]
-; BROADWELL-NEXT: fcmovnb %st(1), %st # sched: [3:1.00]
-; BROADWELL-NEXT: fcmovnbe %st(1), %st # sched: [3:1.00]
-; BROADWELL-NEXT: fcmovne %st(1), %st # sched: [3:1.00]
-; BROADWELL-NEXT: fcmovnu %st(1), %st # sched: [3:1.00]
-; BROADWELL-NEXT: fcmovu %st(1), %st # sched: [3:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fcmov:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fcmovb %st(1), %st # sched: [3:1.00]
-; SKYLAKE-NEXT: fcmovbe %st(1), %st # sched: [3:1.00]
-; SKYLAKE-NEXT: fcmove %st(1), %st # sched: [3:1.00]
-; SKYLAKE-NEXT: fcmovnb %st(1), %st # sched: [3:1.00]
-; SKYLAKE-NEXT: fcmovnbe %st(1), %st # sched: [3:1.00]
-; SKYLAKE-NEXT: fcmovne %st(1), %st # sched: [3:1.00]
-; SKYLAKE-NEXT: fcmovnu %st(1), %st # sched: [3:1.00]
-; SKYLAKE-NEXT: fcmovu %st(1), %st # sched: [3:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fcmov:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: fcmovb %st(1), %st # sched: [3:1.00]
-; SKX-NEXT: fcmovbe %st(1), %st # sched: [3:1.00]
-; SKX-NEXT: fcmove %st(1), %st # sched: [3:1.00]
-; SKX-NEXT: fcmovnb %st(1), %st # sched: [3:1.00]
-; SKX-NEXT: fcmovnbe %st(1), %st # sched: [3:1.00]
-; SKX-NEXT: fcmovne %st(1), %st # sched: [3:1.00]
-; SKX-NEXT: fcmovnu %st(1), %st # sched: [3:1.00]
-; SKX-NEXT: fcmovu %st(1), %st # sched: [3:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fcmov:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fcmovb %st(1), %st # sched: [1:1.00]
-; BDVER2-NEXT: fcmovbe %st(1), %st # sched: [1:1.00]
-; BDVER2-NEXT: fcmove %st(1), %st # sched: [1:1.00]
-; BDVER2-NEXT: fcmovnb %st(1), %st # sched: [1:1.00]
-; BDVER2-NEXT: fcmovnbe %st(1), %st # sched: [1:1.00]
-; BDVER2-NEXT: fcmovne %st(1), %st # sched: [1:1.00]
-; BDVER2-NEXT: fcmovnu %st(1), %st # sched: [1:1.00]
-; BDVER2-NEXT: fcmovu %st(1), %st # sched: [1:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fcmov:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fcmovb %st(1), %st # sched: [3:1.00]
-; BTVER2-NEXT: fcmovbe %st(1), %st # sched: [3:1.00]
-; BTVER2-NEXT: fcmove %st(1), %st # sched: [3:1.00]
-; BTVER2-NEXT: fcmovnb %st(1), %st # sched: [3:1.00]
-; BTVER2-NEXT: fcmovnbe %st(1), %st # sched: [3:1.00]
-; BTVER2-NEXT: fcmovne %st(1), %st # sched: [3:1.00]
-; BTVER2-NEXT: fcmovnu %st(1), %st # sched: [3:1.00]
-; BTVER2-NEXT: fcmovu %st(1), %st # sched: [3:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fcmov:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fcmovb %st(1), %st # sched: [100:0.25]
-; ZNVER1-NEXT: fcmovbe %st(1), %st # sched: [100:0.25]
-; ZNVER1-NEXT: fcmove %st(1), %st # sched: [100:0.25]
-; ZNVER1-NEXT: fcmovnb %st(1), %st # sched: [100:0.25]
-; ZNVER1-NEXT: fcmovnbe %st(1), %st # sched: [100:0.25]
-; ZNVER1-NEXT: fcmovne %st(1), %st # sched: [100:0.25]
-; ZNVER1-NEXT: fcmovnu %st(1), %st # sched: [100:0.25]
-; ZNVER1-NEXT: fcmovu %st(1), %st # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fcmovb %st(1), %st(0) \0A\09 fcmovbe %st(1), %st(0) \0A\09 fcmove %st(1), %st(0) \0A\09 fcmovnb %st(1), %st(0) \0A\09 fcmovnbe %st(1), %st(0) \0A\09 fcmovne %st(1), %st(0) \0A\09 fcmovnu %st(1), %st(0) \0A\09 fcmovu %st(1), %st(0)", ""() nounwind
- ret void
-}
-
-define void @test_fcom(float *%a0, double *%a1) optsize {
-; GENERIC-LABEL: test_fcom:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fcom %st(1)
-; GENERIC-NEXT: fcom %st(3)
-; GENERIC-NEXT: fcoms (%ecx)
-; GENERIC-NEXT: fcoml (%eax)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fcom:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fcom %st(1) # sched: [5:5.00]
-; ATOM-NEXT: fcom %st(3) # sched: [5:5.00]
-; ATOM-NEXT: fcoms (%ecx) # sched: [5:5.00]
-; ATOM-NEXT: fcoml (%eax) # sched: [5:5.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fcom:
-; SLM: # %bb.0:
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: fcom %st(1) # sched: [3:1.00]
-; SLM-NEXT: fcom %st(3) # sched: [3:1.00]
-; SLM-NEXT: fcoms (%ecx) # sched: [6:1.00]
-; SLM-NEXT: fcoml (%eax) # sched: [6:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fcom:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fcom %st(1) # sched: [1:1.00]
-; SANDY-NEXT: fcom %st(3) # sched: [1:1.00]
-; SANDY-NEXT: fcoms (%ecx) # sched: [8:1.00]
-; SANDY-NEXT: fcoml (%eax) # sched: [8:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fcom:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fcom %st(1) # sched: [1:1.00]
-; HASWELL-NEXT: fcom %st(3) # sched: [1:1.00]
-; HASWELL-NEXT: fcoms (%ecx) # sched: [8:1.00]
-; HASWELL-NEXT: fcoml (%eax) # sched: [8:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fcom:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fcom %st(1) # sched: [1:1.00]
-; BROADWELL-NEXT: fcom %st(3) # sched: [1:1.00]
-; BROADWELL-NEXT: fcoms (%ecx) # sched: [7:1.00]
-; BROADWELL-NEXT: fcoml (%eax) # sched: [7:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fcom:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fcom %st(1) # sched: [1:1.00]
-; SKYLAKE-NEXT: fcom %st(3) # sched: [1:1.00]
-; SKYLAKE-NEXT: fcoms (%ecx) # sched: [8:1.00]
-; SKYLAKE-NEXT: fcoml (%eax) # sched: [8:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fcom:
-; SKX: # %bb.0:
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: fcom %st(1) # sched: [1:1.00]
-; SKX-NEXT: fcom %st(3) # sched: [1:1.00]
-; SKX-NEXT: fcoms (%ecx) # sched: [8:1.00]
-; SKX-NEXT: fcoml (%eax) # sched: [8:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fcom:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fcom %st(1) # sched: [1:1.00]
-; BDVER2-NEXT: fcom %st(3) # sched: [1:1.00]
-; BDVER2-NEXT: fcoms (%ecx) # sched: [6:1.00]
-; BDVER2-NEXT: fcoml (%eax) # sched: [6:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fcom:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fcom %st(1) # sched: [3:1.00]
-; BTVER2-NEXT: fcom %st(3) # sched: [3:1.00]
-; BTVER2-NEXT: fcoms (%ecx) # sched: [8:1.00]
-; BTVER2-NEXT: fcoml (%eax) # sched: [8:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fcom:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fcom %st(1) # sched: [1:1.00]
-; ZNVER1-NEXT: fcom %st(3) # sched: [1:1.00]
-; ZNVER1-NEXT: fcoms (%ecx) # sched: [8:1.00]
-; ZNVER1-NEXT: fcoml (%eax) # sched: [8:1.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fcom \0A\09 fcom %st(3) \0A\09 fcoms $0 \0A\09 fcoml $1", "*m,*m"(float *%a0, double *%a1) nounwind
- ret void
-}
-
-define void @test_fcomp_fcompp(float *%a0, double *%a1) optsize {
-; GENERIC-LABEL: test_fcomp_fcompp:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fcomp %st(1)
-; GENERIC-NEXT: fcomp %st(3)
-; GENERIC-NEXT: fcomps (%ecx)
-; GENERIC-NEXT: fcompl (%eax)
-; GENERIC-NEXT: fcompp
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fcomp_fcompp:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fcomp %st(1) # sched: [5:5.00]
-; ATOM-NEXT: fcomp %st(3) # sched: [5:5.00]
-; ATOM-NEXT: fcomps (%ecx) # sched: [5:5.00]
-; ATOM-NEXT: fcompl (%eax) # sched: [5:5.00]
-; ATOM-NEXT: fcompp # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fcomp_fcompp:
-; SLM: # %bb.0:
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: fcomp %st(1) # sched: [3:1.00]
-; SLM-NEXT: fcomp %st(3) # sched: [3:1.00]
-; SLM-NEXT: fcomps (%ecx) # sched: [6:1.00]
-; SLM-NEXT: fcompl (%eax) # sched: [6:1.00]
-; SLM-NEXT: fcompp # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fcomp_fcompp:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fcomp %st(1) # sched: [1:1.00]
-; SANDY-NEXT: fcomp %st(3) # sched: [1:1.00]
-; SANDY-NEXT: fcomps (%ecx) # sched: [8:1.00]
-; SANDY-NEXT: fcompl (%eax) # sched: [8:1.00]
-; SANDY-NEXT: fcompp # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fcomp_fcompp:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fcomp %st(1) # sched: [1:1.00]
-; HASWELL-NEXT: fcomp %st(3) # sched: [1:1.00]
-; HASWELL-NEXT: fcomps (%ecx) # sched: [8:1.00]
-; HASWELL-NEXT: fcompl (%eax) # sched: [8:1.00]
-; HASWELL-NEXT: fcompp # sched: [1:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fcomp_fcompp:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fcomp %st(1) # sched: [1:1.00]
-; BROADWELL-NEXT: fcomp %st(3) # sched: [1:1.00]
-; BROADWELL-NEXT: fcomps (%ecx) # sched: [7:1.00]
-; BROADWELL-NEXT: fcompl (%eax) # sched: [7:1.00]
-; BROADWELL-NEXT: fcompp # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fcomp_fcompp:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fcomp %st(1) # sched: [1:1.00]
-; SKYLAKE-NEXT: fcomp %st(3) # sched: [1:1.00]
-; SKYLAKE-NEXT: fcomps (%ecx) # sched: [8:1.00]
-; SKYLAKE-NEXT: fcompl (%eax) # sched: [8:1.00]
-; SKYLAKE-NEXT: fcompp # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fcomp_fcompp:
-; SKX: # %bb.0:
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: fcomp %st(1) # sched: [1:1.00]
-; SKX-NEXT: fcomp %st(3) # sched: [1:1.00]
-; SKX-NEXT: fcomps (%ecx) # sched: [8:1.00]
-; SKX-NEXT: fcompl (%eax) # sched: [8:1.00]
-; SKX-NEXT: fcompp # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fcomp_fcompp:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fcomp %st(1) # sched: [1:1.00]
-; BDVER2-NEXT: fcomp %st(3) # sched: [1:1.00]
-; BDVER2-NEXT: fcomps (%ecx) # sched: [6:1.00]
-; BDVER2-NEXT: fcompl (%eax) # sched: [6:1.00]
-; BDVER2-NEXT: fcompp # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fcomp_fcompp:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fcomp %st(1) # sched: [3:1.00]
-; BTVER2-NEXT: fcomp %st(3) # sched: [3:1.00]
-; BTVER2-NEXT: fcomps (%ecx) # sched: [8:1.00]
-; BTVER2-NEXT: fcompl (%eax) # sched: [8:1.00]
-; BTVER2-NEXT: fcompp # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fcomp_fcompp:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fcomp %st(1) # sched: [1:1.00]
-; ZNVER1-NEXT: fcomp %st(3) # sched: [1:1.00]
-; ZNVER1-NEXT: fcomps (%ecx) # sched: [8:1.00]
-; ZNVER1-NEXT: fcompl (%eax) # sched: [8:1.00]
-; ZNVER1-NEXT: fcompp # sched: [1:1.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fcomp \0A\09 fcomp %st(3) \0A\09 fcomps $0 \0A\09 fcompl $1 \0A\09 fcompp", "*m,*m"(float *%a0, double *%a1) nounwind
- ret void
-}
-
-define void @test_fcomi_fcomip() optsize {
-; GENERIC-LABEL: test_fcomi_fcomip:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fcomi %st(3)
-; GENERIC-NEXT: fcompi %st(3)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fcomi_fcomip:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fcomi %st(3) # sched: [9:4.50]
-; ATOM-NEXT: fcompi %st(3) # sched: [9:4.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fcomi_fcomip:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: fcomi %st(3) # sched: [3:1.00]
-; SLM-NEXT: fcompi %st(3) # sched: [3:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fcomi_fcomip:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fcomi %st(3) # sched: [3:1.00]
-; SANDY-NEXT: fcompi %st(3) # sched: [3:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fcomi_fcomip:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fcomi %st(3) # sched: [1:0.50]
-; HASWELL-NEXT: fcompi %st(3) # sched: [1:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fcomi_fcomip:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fcomi %st(3) # sched: [3:1.00]
-; BROADWELL-NEXT: fcompi %st(3) # sched: [3:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fcomi_fcomip:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fcomi %st(3) # sched: [2:1.00]
-; SKYLAKE-NEXT: fcompi %st(3) # sched: [2:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fcomi_fcomip:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: fcomi %st(3) # sched: [2:1.00]
-; SKX-NEXT: fcompi %st(3) # sched: [2:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fcomi_fcomip:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fcomi %st(3) # sched: [1:1.00]
-; BDVER2-NEXT: fcompi %st(3) # sched: [1:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fcomi_fcomip:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fcomi %st(3) # sched: [3:1.00]
-; BTVER2-NEXT: fcompi %st(3) # sched: [3:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fcomi_fcomip:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fcomi %st(3) # sched: [9:0.50]
-; ZNVER1-NEXT: fcompi %st(3) # sched: [9:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fcomi %st(3) \0A\09 fcomip %st(3)", ""() nounwind
- ret void
-}
-
-define void @test_fcos() optsize {
-; GENERIC-LABEL: test_fcos:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fcos
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fcos:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fcos # sched: [174:87.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fcos:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: fcos # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fcos:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fcos # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fcos:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fcos # sched: [100:0.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fcos:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fcos # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fcos:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fcos # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fcos:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: fcos # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fcos:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fcos # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fcos:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fcos # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fcos:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fcos # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fcos", ""() nounwind
- ret void
-}
-
-define void @test_fdecstp() optsize {
-; GENERIC-LABEL: test_fdecstp:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fdecstp
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fdecstp:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fdecstp # sched: [1:0.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fdecstp:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: fdecstp # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fdecstp:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fdecstp # sched: [1:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fdecstp:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fdecstp # sched: [2:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fdecstp:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fdecstp # sched: [2:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fdecstp:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fdecstp # sched: [2:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fdecstp:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: fdecstp # sched: [2:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fdecstp:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fdecstp # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fdecstp:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fdecstp # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fdecstp:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fdecstp # sched: [11:1.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fdecstp", ""() nounwind
- ret void
-}
-
-define void @test_fdiv(float *%a0, double *%a1) optsize {
-; GENERIC-LABEL: test_fdiv:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fdiv %st, %st(1)
-; GENERIC-NEXT: fdiv %st(2)
-; GENERIC-NEXT: fdivs (%ecx)
-; GENERIC-NEXT: fdivl (%eax)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fdiv:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fdiv %st, %st(1) # sched: [34:17.00]
-; ATOM-NEXT: fdiv %st(2) # sched: [34:17.00]
-; ATOM-NEXT: fdivs (%ecx) # sched: [34:17.00]
-; ATOM-NEXT: fdivl (%eax) # sched: [34:17.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fdiv:
-; SLM: # %bb.0:
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: fdiv %st, %st(1) # sched: [19:17.00]
-; SLM-NEXT: fdiv %st(2) # sched: [19:17.00]
-; SLM-NEXT: fdivs (%ecx) # sched: [22:17.00]
-; SLM-NEXT: fdivl (%eax) # sched: [22:17.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fdiv:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fdiv %st, %st(1) # sched: [14:14.00]
-; SANDY-NEXT: fdiv %st(2) # sched: [14:14.00]
-; SANDY-NEXT: fdivs (%ecx) # sched: [31:1.00]
-; SANDY-NEXT: fdivl (%eax) # sched: [31:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fdiv:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fdiv %st, %st(1) # sched: [24:1.00]
-; HASWELL-NEXT: fdiv %st(2) # sched: [20:1.00]
-; HASWELL-NEXT: fdivs (%ecx) # sched: [31:1.00]
-; HASWELL-NEXT: fdivl (%eax) # sched: [31:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fdiv:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fdiv %st, %st(1) # sched: [15:1.00]
-; BROADWELL-NEXT: fdiv %st(2) # sched: [20:1.00]
-; BROADWELL-NEXT: fdivs (%ecx) # sched: [21:1.00]
-; BROADWELL-NEXT: fdivl (%eax) # sched: [21:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fdiv:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fdiv %st, %st(1) # sched: [15:1.00]
-; SKYLAKE-NEXT: fdiv %st(2) # sched: [20:1.00]
-; SKYLAKE-NEXT: fdivs (%ecx) # sched: [22:1.00]
-; SKYLAKE-NEXT: fdivl (%eax) # sched: [22:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fdiv:
-; SKX: # %bb.0:
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: fdiv %st, %st(1) # sched: [15:1.00]
-; SKX-NEXT: fdiv %st(2) # sched: [20:1.00]
-; SKX-NEXT: fdivs (%ecx) # sched: [22:1.00]
-; SKX-NEXT: fdivl (%eax) # sched: [22:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fdiv:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fdiv %st, %st(1) # sched: [9:9.50]
-; BDVER2-NEXT: fdiv %st(2) # sched: [9:9.50]
-; BDVER2-NEXT: fdivs (%ecx) # sched: [14:9.50]
-; BDVER2-NEXT: fdivl (%eax) # sched: [14:9.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fdiv:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fdiv %st, %st(1) # sched: [19:19.00]
-; BTVER2-NEXT: fdiv %st(2) # sched: [19:19.00]
-; BTVER2-NEXT: fdivs (%ecx) # sched: [24:19.00]
-; BTVER2-NEXT: fdivl (%eax) # sched: [24:19.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fdiv:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fdiv %st, %st(1) # sched: [15:1.00]
-; ZNVER1-NEXT: fdiv %st(2) # sched: [15:1.00]
-; ZNVER1-NEXT: fdivs (%ecx) # sched: [22:1.00]
-; ZNVER1-NEXT: fdivl (%eax) # sched: [22:1.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fdiv %st(0), %st(1) \0A\09 fdiv %st(2), %st(0) \0A\09 fdivs $0 \0A\09 fdivl $1", "*m,*m"(float *%a0, double *%a1) nounwind
- ret void
-}
-
-define void @test_fdivp_fidiv(i16 *%a0, i32 *%a1) optsize {
-; GENERIC-LABEL: test_fdivp_fidiv:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fdivp %st(1)
-; GENERIC-NEXT: fdivp %st(2)
-; GENERIC-NEXT: fidivs (%ecx)
-; GENERIC-NEXT: fidivl (%eax)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fdivp_fidiv:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fdivp %st(1) # sched: [34:17.00]
-; ATOM-NEXT: fdivp %st(2) # sched: [34:17.00]
-; ATOM-NEXT: fidivs (%ecx) # sched: [34:17.00]
-; ATOM-NEXT: fidivl (%eax) # sched: [34:17.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fdivp_fidiv:
-; SLM: # %bb.0:
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: fdivp %st(1) # sched: [19:17.00]
-; SLM-NEXT: fdivp %st(2) # sched: [19:17.00]
-; SLM-NEXT: fidivs (%ecx) # sched: [22:17.00]
-; SLM-NEXT: fidivl (%eax) # sched: [22:17.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fdivp_fidiv:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fdivp %st(1) # sched: [14:14.00]
-; SANDY-NEXT: fdivp %st(2) # sched: [14:14.00]
-; SANDY-NEXT: fidivs (%ecx) # sched: [34:1.00]
-; SANDY-NEXT: fidivl (%eax) # sched: [34:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fdivp_fidiv:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fdivp %st(1) # sched: [24:1.00]
-; HASWELL-NEXT: fdivp %st(2) # sched: [24:1.00]
-; HASWELL-NEXT: fidivs (%ecx) # sched: [34:1.00]
-; HASWELL-NEXT: fidivl (%eax) # sched: [34:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fdivp_fidiv:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fdivp %st(1) # sched: [15:1.00]
-; BROADWELL-NEXT: fdivp %st(2) # sched: [15:1.00]
-; BROADWELL-NEXT: fidivs (%ecx) # sched: [24:1.00]
-; BROADWELL-NEXT: fidivl (%eax) # sched: [24:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fdivp_fidiv:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fdivp %st(1) # sched: [15:1.00]
-; SKYLAKE-NEXT: fdivp %st(2) # sched: [15:1.00]
-; SKYLAKE-NEXT: fidivs (%ecx) # sched: [25:1.00]
-; SKYLAKE-NEXT: fidivl (%eax) # sched: [25:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fdivp_fidiv:
-; SKX: # %bb.0:
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: fdivp %st(1) # sched: [15:1.00]
-; SKX-NEXT: fdivp %st(2) # sched: [15:1.00]
-; SKX-NEXT: fidivs (%ecx) # sched: [25:1.00]
-; SKX-NEXT: fidivl (%eax) # sched: [25:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fdivp_fidiv:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fdivp %st(1) # sched: [9:9.50]
-; BDVER2-NEXT: fdivp %st(2) # sched: [9:9.50]
-; BDVER2-NEXT: fidivs (%ecx) # sched: [14:9.50]
-; BDVER2-NEXT: fidivl (%eax) # sched: [14:9.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fdivp_fidiv:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fdivp %st(1) # sched: [19:19.00]
-; BTVER2-NEXT: fdivp %st(2) # sched: [19:19.00]
-; BTVER2-NEXT: fidivs (%ecx) # sched: [24:19.00]
-; BTVER2-NEXT: fidivl (%eax) # sched: [24:19.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fdivp_fidiv:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fdivp %st(1) # sched: [15:1.00]
-; ZNVER1-NEXT: fdivp %st(2) # sched: [15:1.00]
-; ZNVER1-NEXT: fidivs (%ecx) # sched: [22:1.00]
-; ZNVER1-NEXT: fidivl (%eax) # sched: [22:1.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fdivp \0A\09 fdivp %st(2), %st(0) \0A\09 fidivs $0 \0A\09 fidivl $1", "*m,*m"(i16 *%a0, i32 *%a1) nounwind
- ret void
-}
-
-define void @test_fdivr(float *%a0, double *%a1) optsize {
-; GENERIC-LABEL: test_fdivr:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fdivr %st, %st(1)
-; GENERIC-NEXT: fdivr %st(2)
-; GENERIC-NEXT: fdivrs (%ecx)
-; GENERIC-NEXT: fdivrl (%eax)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fdivr:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fdivr %st, %st(1) # sched: [34:17.00]
-; ATOM-NEXT: fdivr %st(2) # sched: [34:17.00]
-; ATOM-NEXT: fdivrs (%ecx) # sched: [34:17.00]
-; ATOM-NEXT: fdivrl (%eax) # sched: [34:17.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fdivr:
-; SLM: # %bb.0:
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: fdivr %st, %st(1) # sched: [19:17.00]
-; SLM-NEXT: fdivr %st(2) # sched: [19:17.00]
-; SLM-NEXT: fdivrs (%ecx) # sched: [22:17.00]
-; SLM-NEXT: fdivrl (%eax) # sched: [22:17.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fdivr:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fdivr %st, %st(1) # sched: [14:14.00]
-; SANDY-NEXT: fdivr %st(2) # sched: [14:14.00]
-; SANDY-NEXT: fdivrs (%ecx) # sched: [31:1.00]
-; SANDY-NEXT: fdivrl (%eax) # sched: [31:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fdivr:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fdivr %st, %st(1) # sched: [20:1.00]
-; HASWELL-NEXT: fdivr %st(2) # sched: [24:1.00]
-; HASWELL-NEXT: fdivrs (%ecx) # sched: [27:1.00]
-; HASWELL-NEXT: fdivrl (%eax) # sched: [27:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fdivr:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fdivr %st, %st(1) # sched: [20:1.00]
-; BROADWELL-NEXT: fdivr %st(2) # sched: [15:1.00]
-; BROADWELL-NEXT: fdivrs (%ecx) # sched: [26:1.00]
-; BROADWELL-NEXT: fdivrl (%eax) # sched: [26:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fdivr:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fdivr %st, %st(1) # sched: [20:1.00]
-; SKYLAKE-NEXT: fdivr %st(2) # sched: [15:1.00]
-; SKYLAKE-NEXT: fdivrs (%ecx) # sched: [27:1.00]
-; SKYLAKE-NEXT: fdivrl (%eax) # sched: [27:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fdivr:
-; SKX: # %bb.0:
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: fdivr %st, %st(1) # sched: [20:1.00]
-; SKX-NEXT: fdivr %st(2) # sched: [15:1.00]
-; SKX-NEXT: fdivrs (%ecx) # sched: [27:1.00]
-; SKX-NEXT: fdivrl (%eax) # sched: [27:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fdivr:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fdivr %st, %st(1) # sched: [9:9.50]
-; BDVER2-NEXT: fdivr %st(2) # sched: [9:9.50]
-; BDVER2-NEXT: fdivrs (%ecx) # sched: [14:9.50]
-; BDVER2-NEXT: fdivrl (%eax) # sched: [14:9.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fdivr:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fdivr %st, %st(1) # sched: [19:19.00]
-; BTVER2-NEXT: fdivr %st(2) # sched: [19:19.00]
-; BTVER2-NEXT: fdivrs (%ecx) # sched: [24:19.00]
-; BTVER2-NEXT: fdivrl (%eax) # sched: [24:19.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fdivr:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fdivr %st, %st(1) # sched: [15:1.00]
-; ZNVER1-NEXT: fdivr %st(2) # sched: [15:1.00]
-; ZNVER1-NEXT: fdivrs (%ecx) # sched: [22:1.00]
-; ZNVER1-NEXT: fdivrl (%eax) # sched: [22:1.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fdivr %st(0), %st(1) \0A\09 fdivr %st(2), %st(0) \0A\09 fdivrs $0 \0A\09 fdivrl $1", "*m,*m"(float *%a0, double *%a1) nounwind
- ret void
-}
-
-define void @test_fdivrp_fidivr(i16 *%a0, i32 *%a1) optsize {
-; GENERIC-LABEL: test_fdivrp_fidivr:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fdivrp %st(1)
-; GENERIC-NEXT: fdivrp %st(2)
-; GENERIC-NEXT: fidivrs (%ecx)
-; GENERIC-NEXT: fidivrl (%eax)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fdivrp_fidivr:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fdivrp %st(1) # sched: [34:17.00]
-; ATOM-NEXT: fdivrp %st(2) # sched: [34:17.00]
-; ATOM-NEXT: fidivrs (%ecx) # sched: [34:17.00]
-; ATOM-NEXT: fidivrl (%eax) # sched: [34:17.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fdivrp_fidivr:
-; SLM: # %bb.0:
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: fdivrp %st(1) # sched: [19:17.00]
-; SLM-NEXT: fdivrp %st(2) # sched: [19:17.00]
-; SLM-NEXT: fidivrs (%ecx) # sched: [22:17.00]
-; SLM-NEXT: fidivrl (%eax) # sched: [22:17.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fdivrp_fidivr:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fdivrp %st(1) # sched: [14:14.00]
-; SANDY-NEXT: fdivrp %st(2) # sched: [14:14.00]
-; SANDY-NEXT: fidivrs (%ecx) # sched: [34:1.00]
-; SANDY-NEXT: fidivrl (%eax) # sched: [34:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fdivrp_fidivr:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fdivrp %st(1) # sched: [20:1.00]
-; HASWELL-NEXT: fdivrp %st(2) # sched: [20:1.00]
-; HASWELL-NEXT: fidivrs (%ecx) # sched: [30:1.00]
-; HASWELL-NEXT: fidivrl (%eax) # sched: [30:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fdivrp_fidivr:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fdivrp %st(1) # sched: [20:1.00]
-; BROADWELL-NEXT: fdivrp %st(2) # sched: [20:1.00]
-; BROADWELL-NEXT: fidivrs (%ecx) # sched: [29:1.00]
-; BROADWELL-NEXT: fidivrl (%eax) # sched: [29:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fdivrp_fidivr:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fdivrp %st(1) # sched: [20:1.00]
-; SKYLAKE-NEXT: fdivrp %st(2) # sched: [20:1.00]
-; SKYLAKE-NEXT: fidivrs (%ecx) # sched: [30:1.00]
-; SKYLAKE-NEXT: fidivrl (%eax) # sched: [30:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fdivrp_fidivr:
-; SKX: # %bb.0:
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: fdivrp %st(1) # sched: [20:1.00]
-; SKX-NEXT: fdivrp %st(2) # sched: [20:1.00]
-; SKX-NEXT: fidivrs (%ecx) # sched: [30:1.00]
-; SKX-NEXT: fidivrl (%eax) # sched: [30:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fdivrp_fidivr:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fdivrp %st(1) # sched: [9:9.50]
-; BDVER2-NEXT: fdivrp %st(2) # sched: [9:9.50]
-; BDVER2-NEXT: fidivrs (%ecx) # sched: [14:9.50]
-; BDVER2-NEXT: fidivrl (%eax) # sched: [14:9.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fdivrp_fidivr:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fdivrp %st(1) # sched: [19:19.00]
-; BTVER2-NEXT: fdivrp %st(2) # sched: [19:19.00]
-; BTVER2-NEXT: fidivrs (%ecx) # sched: [24:19.00]
-; BTVER2-NEXT: fidivrl (%eax) # sched: [24:19.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fdivrp_fidivr:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fdivrp %st(1) # sched: [15:1.00]
-; ZNVER1-NEXT: fdivrp %st(2) # sched: [15:1.00]
-; ZNVER1-NEXT: fidivrs (%ecx) # sched: [22:1.00]
-; ZNVER1-NEXT: fidivrl (%eax) # sched: [22:1.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fdivrp \0A\09 fdivrp %st(2), %st(0) \0A\09 fidivrs $0 \0A\09 fidivrl $1", "*m,*m"(i16 *%a0, i32 *%a1) nounwind
- ret void
-}
-
-define void @test_ffree() optsize {
-; GENERIC-LABEL: test_ffree:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: ffree %st(0)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_ffree:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: ffree %st(0) # sched: [1:0.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_ffree:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: ffree %st(0) # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_ffree:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: ffree %st(0) # sched: [1:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_ffree:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: ffree %st(0) # sched: [1:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_ffree:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: ffree %st(0) # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_ffree:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: ffree %st(0) # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_ffree:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: ffree %st(0) # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_ffree:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: ffree %st(0) # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_ffree:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: ffree %st(0) # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_ffree:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: ffree %st(0) # sched: [11:1.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "ffree %st(0)", ""() nounwind
- ret void
-}
-
-define void @test_ficom(i16 *%a0, i32 *%a1) optsize {
-; GENERIC-LABEL: test_ficom:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: ficoms (%ecx)
-; GENERIC-NEXT: ficoml (%eax)
-; GENERIC-NEXT: ficomps (%ecx)
-; GENERIC-NEXT: ficompl (%eax)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_ficom:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: ficoms (%ecx) # sched: [5:5.00]
-; ATOM-NEXT: ficoml (%eax) # sched: [5:5.00]
-; ATOM-NEXT: ficomps (%ecx) # sched: [5:5.00]
-; ATOM-NEXT: ficompl (%eax) # sched: [5:5.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_ficom:
-; SLM: # %bb.0:
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: ficoms (%ecx) # sched: [6:1.00]
-; SLM-NEXT: ficoml (%eax) # sched: [6:1.00]
-; SLM-NEXT: ficomps (%ecx) # sched: [6:1.00]
-; SLM-NEXT: ficompl (%eax) # sched: [6:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_ficom:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: ficoms (%ecx) # sched: [11:2.00]
-; SANDY-NEXT: ficoml (%eax) # sched: [11:2.00]
-; SANDY-NEXT: ficomps (%ecx) # sched: [11:2.00]
-; SANDY-NEXT: ficompl (%eax) # sched: [11:2.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_ficom:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: ficoms (%ecx) # sched: [11:2.00]
-; HASWELL-NEXT: ficoml (%eax) # sched: [11:2.00]
-; HASWELL-NEXT: ficomps (%ecx) # sched: [11:2.00]
-; HASWELL-NEXT: ficompl (%eax) # sched: [11:2.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_ficom:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: ficoms (%ecx) # sched: [10:2.00]
-; BROADWELL-NEXT: ficoml (%eax) # sched: [10:2.00]
-; BROADWELL-NEXT: ficomps (%ecx) # sched: [10:2.00]
-; BROADWELL-NEXT: ficompl (%eax) # sched: [10:2.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_ficom:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: ficoms (%ecx) # sched: [11:2.00]
-; SKYLAKE-NEXT: ficoml (%eax) # sched: [11:2.00]
-; SKYLAKE-NEXT: ficomps (%ecx) # sched: [11:2.00]
-; SKYLAKE-NEXT: ficompl (%eax) # sched: [11:2.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_ficom:
-; SKX: # %bb.0:
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: ficoms (%ecx) # sched: [11:2.00]
-; SKX-NEXT: ficoml (%eax) # sched: [11:2.00]
-; SKX-NEXT: ficomps (%ecx) # sched: [11:2.00]
-; SKX-NEXT: ficompl (%eax) # sched: [11:2.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_ficom:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: ficoms (%ecx) # sched: [6:1.00]
-; BDVER2-NEXT: ficoml (%eax) # sched: [6:1.00]
-; BDVER2-NEXT: ficomps (%ecx) # sched: [6:1.00]
-; BDVER2-NEXT: ficompl (%eax) # sched: [6:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_ficom:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: ficoms (%ecx) # sched: [8:1.00]
-; BTVER2-NEXT: ficoml (%eax) # sched: [8:1.00]
-; BTVER2-NEXT: ficomps (%ecx) # sched: [8:1.00]
-; BTVER2-NEXT: ficompl (%eax) # sched: [8:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_ficom:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: ficoms (%ecx) # sched: [12:1.50]
-; ZNVER1-NEXT: ficoml (%eax) # sched: [12:1.50]
-; ZNVER1-NEXT: ficomps (%ecx) # sched: [12:1.50]
-; ZNVER1-NEXT: ficompl (%eax) # sched: [12:1.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "ficoms $0 \0A\09 ficoml $1 \0A\09 ficomps $0 \0A\09 ficompl $1", "*m,*m"(i16 *%a0, i32 *%a1) nounwind
- ret void
-}
-
-define void @test_fild(i16 *%a0, i32 *%a1, i64 *%a2) optsize {
-; GENERIC-LABEL: test_fild:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %edx
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: filds (%edx)
-; GENERIC-NEXT: fildl (%ecx)
-; GENERIC-NEXT: fildll (%eax)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fild:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: filds (%edx) # sched: [5:5.00]
-; ATOM-NEXT: fildl (%ecx) # sched: [5:5.00]
-; ATOM-NEXT: fildll (%eax) # sched: [5:5.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fild:
-; SLM: # %bb.0:
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: filds (%edx) # sched: [3:1.00]
-; SLM-NEXT: fildl (%ecx) # sched: [3:1.00]
-; SLM-NEXT: fildll (%eax) # sched: [3:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fild:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: filds (%edx) # sched: [10:1.00]
-; SANDY-NEXT: fildl (%ecx) # sched: [10:1.00]
-; SANDY-NEXT: fildll (%eax) # sched: [10:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fild:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: filds (%edx) # sched: [10:1.00]
-; HASWELL-NEXT: fildl (%ecx) # sched: [10:1.00]
-; HASWELL-NEXT: fildll (%eax) # sched: [10:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fild:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: filds (%edx) # sched: [9:1.00]
-; BROADWELL-NEXT: fildl (%ecx) # sched: [9:1.00]
-; BROADWELL-NEXT: fildll (%eax) # sched: [9:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fild:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: filds (%edx) # sched: [10:1.00]
-; SKYLAKE-NEXT: fildl (%ecx) # sched: [10:1.00]
-; SKYLAKE-NEXT: fildll (%eax) # sched: [10:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fild:
-; SKX: # %bb.0:
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: filds (%edx) # sched: [10:1.00]
-; SKX-NEXT: fildl (%ecx) # sched: [10:1.00]
-; SKX-NEXT: fildll (%eax) # sched: [10:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fild:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: filds (%edx) # sched: [5:0.50]
-; BDVER2-NEXT: fildl (%ecx) # sched: [5:0.50]
-; BDVER2-NEXT: fildll (%eax) # sched: [5:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fild:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: filds (%edx) # sched: [3:1.00]
-; BTVER2-NEXT: fildl (%ecx) # sched: [3:1.00]
-; BTVER2-NEXT: fildll (%eax) # sched: [3:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fild:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: filds (%edx) # sched: [11:1.00]
-; ZNVER1-NEXT: fildl (%ecx) # sched: [11:1.00]
-; ZNVER1-NEXT: fildll (%eax) # sched: [11:1.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "filds $0 \0A\09 fildl $1 \0A\09 fildll $2", "*m,*m,*m"(i16 *%a0, i32 *%a1, i64 *%a2) nounwind
- ret void
-}
-
-define void @test_fincstp() optsize {
-; GENERIC-LABEL: test_fincstp:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fincstp
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fincstp:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fincstp # sched: [1:0.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fincstp:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: fincstp # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fincstp:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fincstp # sched: [1:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fincstp:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fincstp # sched: [1:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fincstp:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fincstp # sched: [1:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fincstp:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fincstp # sched: [1:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fincstp:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: fincstp # sched: [1:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fincstp:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fincstp # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fincstp:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fincstp # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fincstp:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fincstp # sched: [11:1.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fincstp", ""() nounwind
- ret void
-}
-
-define void @test_finit() optsize {
-; GENERIC-LABEL: test_finit:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: wait
-; GENERIC-NEXT: fninit
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_finit:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: wait # sched: [1:0.50]
-; ATOM-NEXT: fninit # sched: [63:31.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_finit:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: wait # sched: [100:1.00]
-; SLM-NEXT: fninit # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_finit:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: wait # sched: [100:0.33]
-; SANDY-NEXT: fninit # sched: [5:1.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_finit:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: wait # sched: [2:0.50]
-; HASWELL-NEXT: fninit # sched: [75:6.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_finit:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: wait # sched: [2:0.50]
-; BROADWELL-NEXT: fninit # sched: [75:6.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_finit:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: wait # sched: [2:0.50]
-; SKYLAKE-NEXT: fninit # sched: [75:6.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_finit:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: wait # sched: [2:0.50]
-; SKX-NEXT: fninit # sched: [75:6.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_finit:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: wait # sched: [100:0.50]
-; BDVER2-NEXT: fninit # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_finit:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: wait # sched: [100:0.50]
-; BTVER2-NEXT: fninit # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_finit:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: wait # sched: [1:1.00]
-; ZNVER1-NEXT: fninit # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "finit", ""() nounwind
- ret void
-}
-
-define void @test_fninit() optsize {
-; GENERIC-LABEL: test_fninit:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fninit
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fninit:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fninit # sched: [63:31.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fninit:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: fninit # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fninit:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fninit # sched: [5:1.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fninit:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fninit # sched: [75:6.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fninit:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fninit # sched: [75:6.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fninit:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fninit # sched: [75:6.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fninit:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: fninit # sched: [75:6.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fninit:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fninit # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fninit:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fninit # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fninit:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fninit # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fninit", ""() nounwind
- ret void
-}
-
-define void @test_fist_fistp_fisttp(i16* %a0, i32* %a1, i64 *%a2) optsize {
-; GENERIC-LABEL: test_fist_fistp_fisttp:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %edx
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fists (%edx)
-; GENERIC-NEXT: fistl (%ecx)
-; GENERIC-NEXT: fistps (%edx)
-; GENERIC-NEXT: fistpl (%ecx)
-; GENERIC-NEXT: fistpll (%eax)
-; GENERIC-NEXT: fisttps (%edx)
-; GENERIC-NEXT: fisttpl (%ecx)
-; GENERIC-NEXT: fisttpll (%eax)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fist_fistp_fisttp:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fists (%edx) # sched: [6:3.00]
-; ATOM-NEXT: fistl (%ecx) # sched: [6:3.00]
-; ATOM-NEXT: fistps (%edx) # sched: [6:3.00]
-; ATOM-NEXT: fistpl (%ecx) # sched: [6:3.00]
-; ATOM-NEXT: fistpll (%eax) # sched: [6:3.00]
-; ATOM-NEXT: fisttps (%edx) # sched: [2:1.00]
-; ATOM-NEXT: fisttpl (%ecx) # sched: [2:1.00]
-; ATOM-NEXT: fisttpll (%eax) # sched: [2:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fist_fistp_fisttp:
-; SLM: # %bb.0:
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: fists (%edx) # sched: [1:1.00]
-; SLM-NEXT: fistl (%ecx) # sched: [1:1.00]
-; SLM-NEXT: fistps (%edx) # sched: [1:1.00]
-; SLM-NEXT: fistpl (%ecx) # sched: [1:1.00]
-; SLM-NEXT: fistpll (%eax) # sched: [1:1.00]
-; SLM-NEXT: fisttps (%edx) # sched: [1:1.00]
-; SLM-NEXT: fisttpl (%ecx) # sched: [1:1.00]
-; SLM-NEXT: fisttpll (%eax) # sched: [1:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fist_fistp_fisttp:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fists (%edx) # sched: [9:1.00]
-; SANDY-NEXT: fistl (%ecx) # sched: [9:1.00]
-; SANDY-NEXT: fistps (%edx) # sched: [9:1.00]
-; SANDY-NEXT: fistpl (%ecx) # sched: [9:1.00]
-; SANDY-NEXT: fistpll (%eax) # sched: [9:1.00]
-; SANDY-NEXT: fisttps (%edx) # sched: [5:1.00]
-; SANDY-NEXT: fisttpl (%ecx) # sched: [5:1.00]
-; SANDY-NEXT: fisttpll (%eax) # sched: [5:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fist_fistp_fisttp:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fists (%edx) # sched: [4:1.00]
-; HASWELL-NEXT: fistl (%ecx) # sched: [4:1.00]
-; HASWELL-NEXT: fistps (%edx) # sched: [4:1.00]
-; HASWELL-NEXT: fistpl (%ecx) # sched: [4:1.00]
-; HASWELL-NEXT: fistpll (%eax) # sched: [4:1.00]
-; HASWELL-NEXT: fisttps (%edx) # sched: [4:1.00]
-; HASWELL-NEXT: fisttpl (%ecx) # sched: [4:1.00]
-; HASWELL-NEXT: fisttpll (%eax) # sched: [4:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fist_fistp_fisttp:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fists (%edx) # sched: [4:1.00]
-; BROADWELL-NEXT: fistl (%ecx) # sched: [4:1.00]
-; BROADWELL-NEXT: fistps (%edx) # sched: [4:1.00]
-; BROADWELL-NEXT: fistpl (%ecx) # sched: [4:1.00]
-; BROADWELL-NEXT: fistpll (%eax) # sched: [4:1.00]
-; BROADWELL-NEXT: fisttps (%edx) # sched: [4:1.00]
-; BROADWELL-NEXT: fisttpl (%ecx) # sched: [4:1.00]
-; BROADWELL-NEXT: fisttpll (%eax) # sched: [4:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fist_fistp_fisttp:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fists (%edx) # sched: [4:1.00]
-; SKYLAKE-NEXT: fistl (%ecx) # sched: [4:1.00]
-; SKYLAKE-NEXT: fistps (%edx) # sched: [4:1.00]
-; SKYLAKE-NEXT: fistpl (%ecx) # sched: [4:1.00]
-; SKYLAKE-NEXT: fistpll (%eax) # sched: [4:1.00]
-; SKYLAKE-NEXT: fisttps (%edx) # sched: [4:1.00]
-; SKYLAKE-NEXT: fisttpl (%ecx) # sched: [4:1.00]
-; SKYLAKE-NEXT: fisttpll (%eax) # sched: [4:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fist_fistp_fisttp:
-; SKX: # %bb.0:
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: fists (%edx) # sched: [4:1.00]
-; SKX-NEXT: fistl (%ecx) # sched: [4:1.00]
-; SKX-NEXT: fistps (%edx) # sched: [4:1.00]
-; SKX-NEXT: fistpl (%ecx) # sched: [4:1.00]
-; SKX-NEXT: fistpll (%eax) # sched: [4:1.00]
-; SKX-NEXT: fisttps (%edx) # sched: [4:1.00]
-; SKX-NEXT: fisttpl (%ecx) # sched: [4:1.00]
-; SKX-NEXT: fisttpll (%eax) # sched: [4:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fist_fistp_fisttp:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fists (%edx) # sched: [1:1.00]
-; BDVER2-NEXT: fistl (%ecx) # sched: [1:1.00]
-; BDVER2-NEXT: fistps (%edx) # sched: [1:1.00]
-; BDVER2-NEXT: fistpl (%ecx) # sched: [1:1.00]
-; BDVER2-NEXT: fistpll (%eax) # sched: [1:1.00]
-; BDVER2-NEXT: fisttps (%edx) # sched: [1:1.00]
-; BDVER2-NEXT: fisttpl (%ecx) # sched: [1:1.00]
-; BDVER2-NEXT: fisttpll (%eax) # sched: [1:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fist_fistp_fisttp:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fists (%edx) # sched: [1:1.00]
-; BTVER2-NEXT: fistl (%ecx) # sched: [1:1.00]
-; BTVER2-NEXT: fistps (%edx) # sched: [1:1.00]
-; BTVER2-NEXT: fistpl (%ecx) # sched: [1:1.00]
-; BTVER2-NEXT: fistpll (%eax) # sched: [1:1.00]
-; BTVER2-NEXT: fisttps (%edx) # sched: [1:1.00]
-; BTVER2-NEXT: fisttpl (%ecx) # sched: [1:1.00]
-; BTVER2-NEXT: fisttpll (%eax) # sched: [1:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fist_fistp_fisttp:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fists (%edx) # sched: [12:0.50]
-; ZNVER1-NEXT: fistl (%ecx) # sched: [12:0.50]
-; ZNVER1-NEXT: fistps (%edx) # sched: [12:0.50]
-; ZNVER1-NEXT: fistpl (%ecx) # sched: [12:0.50]
-; ZNVER1-NEXT: fistpll (%eax) # sched: [12:0.50]
-; ZNVER1-NEXT: fisttps (%edx) # sched: [12:0.50]
-; ZNVER1-NEXT: fisttpl (%ecx) # sched: [12:0.50]
-; ZNVER1-NEXT: fisttpll (%eax) # sched: [12:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fists $0 \0A\09 fistl $1 \0A\09 fistps $0 \0A\09 fistpl $1 \0A\09 fistpll $2 \0A\09 fisttps $0 \0A\09 fisttpl $1 \0A\09 fisttpll $2", "*m,*m,*m"(i16* %a0, i32* %a1, i64 *%a2) nounwind
- ret void
-}
-
-define void @test_fld(i16* %a0, i32* %a1, i64 *%a2) optsize {
-; GENERIC-LABEL: test_fld:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %edx
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fld %st(0)
-; GENERIC-NEXT: flds (%edx)
-; GENERIC-NEXT: fldl (%ecx)
-; GENERIC-NEXT: fldt (%eax)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fld:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fld %st(0) # sched: [1:1.00]
-; ATOM-NEXT: flds (%edx) # sched: [1:1.00]
-; ATOM-NEXT: fldl (%ecx) # sched: [1:1.00]
-; ATOM-NEXT: fldt (%eax) # sched: [4:2.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fld:
-; SLM: # %bb.0:
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: fld %st(0) # sched: [1:0.50]
-; SLM-NEXT: flds (%edx) # sched: [3:1.00]
-; SLM-NEXT: fldl (%ecx) # sched: [3:1.00]
-; SLM-NEXT: fldt (%eax) # sched: [3:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fld:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fld %st(0) # sched: [1:1.00]
-; SANDY-NEXT: flds (%edx) # sched: [9:1.00]
-; SANDY-NEXT: fldl (%ecx) # sched: [9:1.00]
-; SANDY-NEXT: fldt (%eax) # sched: [9:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fld:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fld %st(0) # sched: [1:0.50]
-; HASWELL-NEXT: flds (%edx) # sched: [7:0.50]
-; HASWELL-NEXT: fldl (%ecx) # sched: [7:0.50]
-; HASWELL-NEXT: fldt (%eax) # sched: [7:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fld:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fld %st(0) # sched: [1:0.25]
-; BROADWELL-NEXT: flds (%edx) # sched: [6:0.50]
-; BROADWELL-NEXT: fldl (%ecx) # sched: [6:0.50]
-; BROADWELL-NEXT: fldt (%eax) # sched: [6:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fld:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fld %st(0) # sched: [1:0.25]
-; SKYLAKE-NEXT: flds (%edx) # sched: [7:0.50]
-; SKYLAKE-NEXT: fldl (%ecx) # sched: [7:0.50]
-; SKYLAKE-NEXT: fldt (%eax) # sched: [7:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fld:
-; SKX: # %bb.0:
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: fld %st(0) # sched: [1:0.25]
-; SKX-NEXT: flds (%edx) # sched: [7:0.50]
-; SKX-NEXT: fldl (%ecx) # sched: [7:0.50]
-; SKX-NEXT: fldt (%eax) # sched: [7:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fld:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fld %st(0) # sched: [1:0.50]
-; BDVER2-NEXT: flds (%edx) # sched: [5:0.50]
-; BDVER2-NEXT: fldl (%ecx) # sched: [5:0.50]
-; BDVER2-NEXT: fldt (%eax) # sched: [5:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fld:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fld %st(0) # sched: [1:0.50]
-; BTVER2-NEXT: flds (%edx) # sched: [3:1.00]
-; BTVER2-NEXT: fldl (%ecx) # sched: [3:1.00]
-; BTVER2-NEXT: fldt (%eax) # sched: [3:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fld:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fld %st(0) # sched: [1:0.50]
-; ZNVER1-NEXT: flds (%edx) # sched: [8:0.50]
-; ZNVER1-NEXT: fldl (%ecx) # sched: [8:0.50]
-; ZNVER1-NEXT: fldt (%eax) # sched: [1:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fld %st(0) \0A\09 flds $0 \0A\09 fldl $1 \0A\09 fldt $2", "*m,*m,*m"(i16* %a0, i32* %a1, i64 *%a2) nounwind
- ret void
-}
-
-define void @test_fldcw_fldenv(i8* %a0) optsize {
-; GENERIC-LABEL: test_fldcw_fldenv:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fldcw (%eax)
-; GENERIC-NEXT: fldenv (%eax)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fldcw_fldenv:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fldcw (%eax) # sched: [5:2.50]
-; ATOM-NEXT: fldenv (%eax) # sched: [100:0.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fldcw_fldenv:
-; SLM: # %bb.0:
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: fldcw (%eax) # sched: [3:1.00]
-; SLM-NEXT: fldenv (%eax) # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fldcw_fldenv:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fldcw (%eax) # sched: [8:2.00]
-; SANDY-NEXT: fldenv (%eax) # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fldcw_fldenv:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fldcw (%eax) # sched: [7:1.00]
-; HASWELL-NEXT: fldenv (%eax) # sched: [61:14.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fldcw_fldenv:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fldcw (%eax) # sched: [7:1.00]
-; BROADWELL-NEXT: fldenv (%eax) # sched: [60:14.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fldcw_fldenv:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fldcw (%eax) # sched: [7:1.00]
-; SKYLAKE-NEXT: fldenv (%eax) # sched: [62:14.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fldcw_fldenv:
-; SKX: # %bb.0:
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: fldcw (%eax) # sched: [7:1.00]
-; SKX-NEXT: fldenv (%eax) # sched: [62:14.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fldcw_fldenv:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fldcw (%eax) # sched: [5:0.50]
-; BDVER2-NEXT: fldenv (%eax) # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fldcw_fldenv:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fldcw (%eax) # sched: [3:1.00]
-; BTVER2-NEXT: fldenv (%eax) # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fldcw_fldenv:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fldcw (%eax) # sched: [100:0.25]
-; ZNVER1-NEXT: fldenv (%eax) # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fldcw $0 \0A\09 fldenv $0", "*m"(i8* %a0) nounwind
- ret void
-}
-
-define void @test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz() optsize {
-; GENERIC-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fld1
-; GENERIC-NEXT: fldl2e
-; GENERIC-NEXT: fldl2t
-; GENERIC-NEXT: fldlg2
-; GENERIC-NEXT: fldln2
-; GENERIC-NEXT: fldpi
-; GENERIC-NEXT: fldz
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fld1 # sched: [6:3.00]
-; ATOM-NEXT: fldl2e # sched: [10:5.00]
-; ATOM-NEXT: fldl2t # sched: [10:5.00]
-; ATOM-NEXT: fldlg2 # sched: [10:5.00]
-; ATOM-NEXT: fldln2 # sched: [10:5.00]
-; ATOM-NEXT: fldpi # sched: [10:5.00]
-; ATOM-NEXT: fldz # sched: [1:0.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: fld1 # sched: [1:0.50]
-; SLM-NEXT: fldl2e # sched: [1:1.00]
-; SLM-NEXT: fldl2t # sched: [1:1.00]
-; SLM-NEXT: fldlg2 # sched: [1:1.00]
-; SLM-NEXT: fldln2 # sched: [1:1.00]
-; SLM-NEXT: fldpi # sched: [1:1.00]
-; SLM-NEXT: fldz # sched: [1:0.50]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fld1 # sched: [1:1.00]
-; SANDY-NEXT: fldl2e # sched: [1:1.00]
-; SANDY-NEXT: fldl2t # sched: [1:1.00]
-; SANDY-NEXT: fldlg2 # sched: [1:1.00]
-; SANDY-NEXT: fldln2 # sched: [1:1.00]
-; SANDY-NEXT: fldpi # sched: [1:1.00]
-; SANDY-NEXT: fldz # sched: [1:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fld1 # sched: [1:1.00]
-; HASWELL-NEXT: fldl2e # sched: [1:1.00]
-; HASWELL-NEXT: fldl2t # sched: [1:1.00]
-; HASWELL-NEXT: fldlg2 # sched: [1:1.00]
-; HASWELL-NEXT: fldln2 # sched: [1:1.00]
-; HASWELL-NEXT: fldpi # sched: [1:1.00]
-; HASWELL-NEXT: fldz # sched: [1:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fld1 # sched: [1:1.00]
-; BROADWELL-NEXT: fldl2e # sched: [1:1.00]
-; BROADWELL-NEXT: fldl2t # sched: [1:1.00]
-; BROADWELL-NEXT: fldlg2 # sched: [1:1.00]
-; BROADWELL-NEXT: fldln2 # sched: [1:1.00]
-; BROADWELL-NEXT: fldpi # sched: [1:1.00]
-; BROADWELL-NEXT: fldz # sched: [1:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fld1 # sched: [1:1.00]
-; SKYLAKE-NEXT: fldl2e # sched: [1:1.00]
-; SKYLAKE-NEXT: fldl2t # sched: [1:1.00]
-; SKYLAKE-NEXT: fldlg2 # sched: [1:1.00]
-; SKYLAKE-NEXT: fldln2 # sched: [1:1.00]
-; SKYLAKE-NEXT: fldpi # sched: [1:1.00]
-; SKYLAKE-NEXT: fldz # sched: [1:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: fld1 # sched: [1:1.00]
-; SKX-NEXT: fldl2e # sched: [1:1.00]
-; SKX-NEXT: fldl2t # sched: [1:1.00]
-; SKX-NEXT: fldlg2 # sched: [1:1.00]
-; SKX-NEXT: fldln2 # sched: [1:1.00]
-; SKX-NEXT: fldpi # sched: [1:1.00]
-; SKX-NEXT: fldz # sched: [1:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fld1 # sched: [3:1.00]
-; BDVER2-NEXT: fldl2e # sched: [3:1.00]
-; BDVER2-NEXT: fldl2t # sched: [3:1.00]
-; BDVER2-NEXT: fldlg2 # sched: [3:1.00]
-; BDVER2-NEXT: fldln2 # sched: [3:1.00]
-; BDVER2-NEXT: fldpi # sched: [3:1.00]
-; BDVER2-NEXT: fldz # sched: [3:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fld1 # sched: [3:1.00]
-; BTVER2-NEXT: fldl2e # sched: [3:1.00]
-; BTVER2-NEXT: fldl2t # sched: [3:1.00]
-; BTVER2-NEXT: fldlg2 # sched: [3:1.00]
-; BTVER2-NEXT: fldln2 # sched: [3:1.00]
-; BTVER2-NEXT: fldpi # sched: [3:1.00]
-; BTVER2-NEXT: fldz # sched: [3:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fld1 # sched: [11:1.00]
-; ZNVER1-NEXT: fldl2e # sched: [11:1.00]
-; ZNVER1-NEXT: fldl2t # sched: [11:1.00]
-; ZNVER1-NEXT: fldlg2 # sched: [11:1.00]
-; ZNVER1-NEXT: fldln2 # sched: [11:1.00]
-; ZNVER1-NEXT: fldpi # sched: [11:1.00]
-; ZNVER1-NEXT: fldz # sched: [8:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fld1 \0A\09 fldl2e \0A\09 fldl2t \0A\09 fldlg2 \0A\09 fldln2 \0A\09 fldpi \0A\09 fldz", ""() nounwind
- ret void
-}
-
-define void @test_fmul(float *%a0, double *%a1) optsize {
-; GENERIC-LABEL: test_fmul:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fmul %st, %st(1)
-; GENERIC-NEXT: fmul %st(2)
-; GENERIC-NEXT: fmuls (%ecx)
-; GENERIC-NEXT: fmull (%eax)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fmul:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fmul %st, %st(1) # sched: [4:4.00]
-; ATOM-NEXT: fmul %st(2) # sched: [4:4.00]
-; ATOM-NEXT: fmuls (%ecx) # sched: [4:4.00]
-; ATOM-NEXT: fmull (%eax) # sched: [4:4.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fmul:
-; SLM: # %bb.0:
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: fmul %st, %st(1) # sched: [5:2.00]
-; SLM-NEXT: fmul %st(2) # sched: [5:2.00]
-; SLM-NEXT: fmuls (%ecx) # sched: [8:2.00]
-; SLM-NEXT: fmull (%eax) # sched: [8:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fmul:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fmul %st, %st(1) # sched: [5:1.00]
-; SANDY-NEXT: fmul %st(2) # sched: [5:1.00]
-; SANDY-NEXT: fmuls (%ecx) # sched: [12:1.00]
-; SANDY-NEXT: fmull (%eax) # sched: [12:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fmul:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fmul %st, %st(1) # sched: [5:1.00]
-; HASWELL-NEXT: fmul %st(2) # sched: [5:1.00]
-; HASWELL-NEXT: fmuls (%ecx) # sched: [12:1.00]
-; HASWELL-NEXT: fmull (%eax) # sched: [12:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fmul:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fmul %st, %st(1) # sched: [5:1.00]
-; BROADWELL-NEXT: fmul %st(2) # sched: [5:1.00]
-; BROADWELL-NEXT: fmuls (%ecx) # sched: [11:1.00]
-; BROADWELL-NEXT: fmull (%eax) # sched: [11:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fmul:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fmul %st, %st(1) # sched: [4:1.00]
-; SKYLAKE-NEXT: fmul %st(2) # sched: [4:1.00]
-; SKYLAKE-NEXT: fmuls (%ecx) # sched: [11:1.00]
-; SKYLAKE-NEXT: fmull (%eax) # sched: [11:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fmul:
-; SKX: # %bb.0:
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: fmul %st, %st(1) # sched: [4:1.00]
-; SKX-NEXT: fmul %st(2) # sched: [4:1.00]
-; SKX-NEXT: fmuls (%ecx) # sched: [11:1.00]
-; SKX-NEXT: fmull (%eax) # sched: [11:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fmul:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fmul %st, %st(1) # sched: [5:1.00]
-; BDVER2-NEXT: fmul %st(2) # sched: [5:1.00]
-; BDVER2-NEXT: fmuls (%ecx) # sched: [10:1.00]
-; BDVER2-NEXT: fmull (%eax) # sched: [10:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fmul:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fmul %st, %st(1) # sched: [2:1.00]
-; BTVER2-NEXT: fmul %st(2) # sched: [2:1.00]
-; BTVER2-NEXT: fmuls (%ecx) # sched: [7:1.00]
-; BTVER2-NEXT: fmull (%eax) # sched: [7:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fmul:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fmul %st, %st(1) # sched: [3:0.50]
-; ZNVER1-NEXT: fmul %st(2) # sched: [3:0.50]
-; ZNVER1-NEXT: fmuls (%ecx) # sched: [10:0.50]
-; ZNVER1-NEXT: fmull (%eax) # sched: [10:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fmul %st(0), %st(1) \0A\09 fmul %st(2), %st(0) \0A\09 fmuls $0 \0A\09 fmull $1", "*m,*m"(float *%a0, double *%a1) nounwind
- ret void
-}
-
-define void @test_fmulp_fimul(i16 *%a0, i32 *%a1) optsize {
-; GENERIC-LABEL: test_fmulp_fimul:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fmulp %st(1)
-; GENERIC-NEXT: fmulp %st(2)
-; GENERIC-NEXT: fimuls (%ecx)
-; GENERIC-NEXT: fimull (%eax)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fmulp_fimul:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fmulp %st(1) # sched: [4:4.00]
-; ATOM-NEXT: fmulp %st(2) # sched: [4:4.00]
-; ATOM-NEXT: fimuls (%ecx) # sched: [4:4.00]
-; ATOM-NEXT: fimull (%eax) # sched: [4:4.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fmulp_fimul:
-; SLM: # %bb.0:
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: fmulp %st(1) # sched: [5:2.00]
-; SLM-NEXT: fmulp %st(2) # sched: [5:2.00]
-; SLM-NEXT: fimuls (%ecx) # sched: [8:2.00]
-; SLM-NEXT: fimull (%eax) # sched: [8:2.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fmulp_fimul:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fmulp %st(1) # sched: [5:1.00]
-; SANDY-NEXT: fmulp %st(2) # sched: [5:1.00]
-; SANDY-NEXT: fimuls (%ecx) # sched: [15:1.00]
-; SANDY-NEXT: fimull (%eax) # sched: [15:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fmulp_fimul:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fmulp %st(1) # sched: [5:1.00]
-; HASWELL-NEXT: fmulp %st(2) # sched: [5:1.00]
-; HASWELL-NEXT: fimuls (%ecx) # sched: [15:1.00]
-; HASWELL-NEXT: fimull (%eax) # sched: [15:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fmulp_fimul:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fmulp %st(1) # sched: [5:1.00]
-; BROADWELL-NEXT: fmulp %st(2) # sched: [5:1.00]
-; BROADWELL-NEXT: fimuls (%ecx) # sched: [14:1.00]
-; BROADWELL-NEXT: fimull (%eax) # sched: [14:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fmulp_fimul:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fmulp %st(1) # sched: [4:1.00]
-; SKYLAKE-NEXT: fmulp %st(2) # sched: [4:1.00]
-; SKYLAKE-NEXT: fimuls (%ecx) # sched: [14:1.00]
-; SKYLAKE-NEXT: fimull (%eax) # sched: [14:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fmulp_fimul:
-; SKX: # %bb.0:
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: fmulp %st(1) # sched: [4:1.00]
-; SKX-NEXT: fmulp %st(2) # sched: [4:1.00]
-; SKX-NEXT: fimuls (%ecx) # sched: [14:1.00]
-; SKX-NEXT: fimull (%eax) # sched: [14:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fmulp_fimul:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fmulp %st(1) # sched: [5:1.00]
-; BDVER2-NEXT: fmulp %st(2) # sched: [5:1.00]
-; BDVER2-NEXT: fimuls (%ecx) # sched: [10:1.00]
-; BDVER2-NEXT: fimull (%eax) # sched: [10:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fmulp_fimul:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fmulp %st(1) # sched: [2:1.00]
-; BTVER2-NEXT: fmulp %st(2) # sched: [2:1.00]
-; BTVER2-NEXT: fimuls (%ecx) # sched: [7:1.00]
-; BTVER2-NEXT: fimull (%eax) # sched: [7:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fmulp_fimul:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fmulp %st(1) # sched: [3:0.50]
-; ZNVER1-NEXT: fmulp %st(2) # sched: [3:0.50]
-; ZNVER1-NEXT: fimuls (%ecx) # sched: [10:0.50]
-; ZNVER1-NEXT: fimull (%eax) # sched: [10:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fmulp \0A\09 fmulp %st(2), %st(0) \0A\09 fimuls $0 \0A\09 fimull $1", "*m,*m"(i16 *%a0, i32 *%a1) nounwind
- ret void
-}
-
-define void @test_fnop() optsize {
-; GENERIC-LABEL: test_fnop:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fnop
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fnop:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fnop # sched: [1:0.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fnop:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: fnop # sched: [1:0.50]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fnop:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fnop # sched: [1:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fnop:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fnop # sched: [1:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fnop:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fnop # sched: [1:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fnop:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fnop # sched: [1:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fnop:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: fnop # sched: [1:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fnop:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fnop # sched: [1:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fnop:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fnop # sched: [1:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fnop:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fnop # sched: [1:1.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fnop", ""() nounwind
- ret void
-}
-
-define void @test_fpatan() optsize {
-; GENERIC-LABEL: test_fpatan:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fpatan
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fpatan:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fpatan # sched: [183:91.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fpatan:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: fpatan # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fpatan:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fpatan # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fpatan:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fpatan # sched: [100:0.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fpatan:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fpatan # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fpatan:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fpatan # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fpatan:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: fpatan # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fpatan:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fpatan # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fpatan:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fpatan # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fpatan:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fpatan # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fpatan", ""() nounwind
- ret void
-}
-
-define void @test_fprem_fprem1() optsize {
-; GENERIC-LABEL: test_fprem_fprem1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fprem
-; GENERIC-NEXT: fprem1
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fprem_fprem1:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fprem # sched: [55:27.50]
-; ATOM-NEXT: fprem1 # sched: [71:35.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fprem_fprem1:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: fprem # sched: [100:1.00]
-; SLM-NEXT: fprem1 # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fprem_fprem1:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fprem # sched: [100:0.33]
-; SANDY-NEXT: fprem1 # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fprem_fprem1:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fprem # sched: [19:7.00]
-; HASWELL-NEXT: fprem1 # sched: [27:10.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fprem_fprem1:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fprem # sched: [100:0.25]
-; BROADWELL-NEXT: fprem1 # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fprem_fprem1:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fprem # sched: [100:0.25]
-; SKYLAKE-NEXT: fprem1 # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fprem_fprem1:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: fprem # sched: [100:0.25]
-; SKX-NEXT: fprem1 # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fprem_fprem1:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fprem # sched: [100:0.50]
-; BDVER2-NEXT: fprem1 # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fprem_fprem1:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fprem # sched: [100:0.50]
-; BTVER2-NEXT: fprem1 # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fprem_fprem1:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fprem # sched: [100:0.25]
-; ZNVER1-NEXT: fprem1 # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fprem \0A\09 fprem1", ""() nounwind
- ret void
-}
-
-define void @test_fptan() optsize {
-; GENERIC-LABEL: test_fptan:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fptan
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fptan:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fptan # sched: [168:84.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fptan:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: fptan # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fptan:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fptan # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fptan:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fptan # sched: [100:0.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fptan:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fptan # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fptan:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fptan # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fptan:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: fptan # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fptan:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fptan # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fptan:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fptan # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fptan:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fptan # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fptan", ""() nounwind
- ret void
-}
-
-define void @test_frndint() optsize {
-; GENERIC-LABEL: test_frndint:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: frndint
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_frndint:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: frndint # sched: [46:23.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_frndint:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: frndint # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_frndint:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: frndint # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_frndint:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: frndint # sched: [11:4.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_frndint:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: frndint # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_frndint:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: frndint # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_frndint:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: frndint # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_frndint:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: frndint # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_frndint:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: frndint # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_frndint:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: frndint # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "frndint", ""() nounwind
- ret void
-}
-
-define void @test_frstor(i8* %a0) optsize {
-; GENERIC-LABEL: test_frstor:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: frstor (%eax)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_frstor:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: frstor (%eax) # sched: [100:0.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_frstor:
-; SLM: # %bb.0:
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: frstor (%eax) # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_frstor:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: frstor (%eax) # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_frstor:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: frstor (%eax) # sched: [1:22.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_frstor:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: frstor (%eax) # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_frstor:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: frstor (%eax) # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_frstor:
-; SKX: # %bb.0:
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: frstor (%eax) # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_frstor:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: frstor (%eax) # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_frstor:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: frstor (%eax) # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_frstor:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: frstor (%eax) # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "frstor $0", "*m"(i8* %a0) nounwind
- ret void
-}
-
-define void @test_fsave(i8* %a0) optsize {
-; GENERIC-LABEL: test_fsave:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: wait
-; GENERIC-NEXT: fnsave (%eax)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fsave:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: wait # sched: [1:0.50]
-; ATOM-NEXT: fnsave (%eax) # sched: [100:0.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fsave:
-; SLM: # %bb.0:
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: wait # sched: [100:1.00]
-; SLM-NEXT: fnsave (%eax) # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fsave:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: wait # sched: [100:0.33]
-; SANDY-NEXT: fnsave (%eax) # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fsave:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: wait # sched: [2:0.50]
-; HASWELL-NEXT: fnsave (%eax) # sched: [1:36.75]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fsave:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: wait # sched: [2:0.50]
-; BROADWELL-NEXT: fnsave (%eax) # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fsave:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: wait # sched: [2:0.50]
-; SKYLAKE-NEXT: fnsave (%eax) # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fsave:
-; SKX: # %bb.0:
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: wait # sched: [2:0.50]
-; SKX-NEXT: fnsave (%eax) # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fsave:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: wait # sched: [100:0.50]
-; BDVER2-NEXT: fnsave (%eax) # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fsave:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: wait # sched: [100:0.50]
-; BTVER2-NEXT: fnsave (%eax) # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fsave:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: wait # sched: [1:1.00]
-; ZNVER1-NEXT: fnsave (%eax) # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fsave $0", "*m"(i8* %a0) nounwind
- ret void
-}
-
-define void @test_fnsave(i8* %a0) optsize {
-; GENERIC-LABEL: test_fnsave:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fnsave (%eax)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fnsave:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fnsave (%eax) # sched: [100:0.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fnsave:
-; SLM: # %bb.0:
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: fnsave (%eax) # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fnsave:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fnsave (%eax) # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fnsave:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fnsave (%eax) # sched: [1:36.75]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fnsave:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fnsave (%eax) # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fnsave:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fnsave (%eax) # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fnsave:
-; SKX: # %bb.0:
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: fnsave (%eax) # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fnsave:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fnsave (%eax) # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fnsave:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fnsave (%eax) # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fnsave:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fnsave (%eax) # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fnsave $0", "*m"(i8* %a0) nounwind
- ret void
-}
-
-define void @test_fscale() optsize {
-; GENERIC-LABEL: test_fscale:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fscale
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fscale:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fscale # sched: [77:38.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fscale:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: fscale # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fscale:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fscale # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fscale:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fscale # sched: [75:12.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fscale:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fscale # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fscale:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fscale # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fscale:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: fscale # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fscale:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fscale # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fscale:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fscale # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fscale:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fscale # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fscale", ""() nounwind
- ret void
-}
-
-define void @test_fsin() optsize {
-; GENERIC-LABEL: test_fsin:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fsin
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fsin:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fsin # sched: [174:87.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fsin:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: fsin # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fsin:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fsin # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fsin:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fsin # sched: [100:0.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fsin:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fsin # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fsin:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fsin # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fsin:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: fsin # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fsin:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fsin # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fsin:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fsin # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fsin:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fsin # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fsin", ""() nounwind
- ret void
-}
-
-define void @test_fsincos() optsize {
-; GENERIC-LABEL: test_fsincos:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fsincos
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fsincos:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fsincos # sched: [174:87.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fsincos:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: fsincos # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fsincos:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fsincos # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fsincos:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fsincos # sched: [100:0.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fsincos:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fsincos # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fsincos:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fsincos # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fsincos:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: fsincos # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fsincos:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fsincos # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fsincos:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fsincos # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fsincos:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fsincos # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fsincos", ""() nounwind
- ret void
-}
-
-define void @test_fsqrt() optsize {
-; GENERIC-LABEL: test_fsqrt:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fsqrt
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fsqrt:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fsqrt # sched: [71:35.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fsqrt:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: fsqrt # sched: [40:40.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fsqrt:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fsqrt # sched: [24:24.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fsqrt:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fsqrt # sched: [23:17.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fsqrt:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fsqrt # sched: [23:9.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fsqrt:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fsqrt # sched: [21:7.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fsqrt:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: fsqrt # sched: [21:7.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fsqrt:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fsqrt # sched: [1:17.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fsqrt:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fsqrt # sched: [35:35.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fsqrt:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fsqrt # sched: [20:20.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fsqrt", ""() nounwind
- ret void
-}
-
-define void @test_fst_fstp(i16* %a0, i32* %a1, i64 *%a2) optsize {
-; GENERIC-LABEL: test_fst_fstp:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %edx
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fst %st(0)
-; GENERIC-NEXT: fsts (%edx)
-; GENERIC-NEXT: fstl (%ecx)
-; GENERIC-NEXT: fstp %st(0)
-; GENERIC-NEXT: fstpl (%edx)
-; GENERIC-NEXT: fstpl (%ecx)
-; GENERIC-NEXT: fstpt (%eax)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fst_fstp:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fst %st(0) # sched: [2:1.00]
-; ATOM-NEXT: fsts (%edx) # sched: [2:1.00]
-; ATOM-NEXT: fstl (%ecx) # sched: [2:1.00]
-; ATOM-NEXT: fstp %st(0) # sched: [2:1.00]
-; ATOM-NEXT: fstpl (%edx) # sched: [2:1.00]
-; ATOM-NEXT: fstpl (%ecx) # sched: [2:1.00]
-; ATOM-NEXT: fstpt (%eax) # sched: [5:2.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fst_fstp:
-; SLM: # %bb.0:
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: fst %st(0) # sched: [1:0.50]
-; SLM-NEXT: fsts (%edx) # sched: [1:1.00]
-; SLM-NEXT: fstl (%ecx) # sched: [1:1.00]
-; SLM-NEXT: fstp %st(0) # sched: [1:0.50]
-; SLM-NEXT: fstpl (%edx) # sched: [1:1.00]
-; SLM-NEXT: fstpl (%ecx) # sched: [1:1.00]
-; SLM-NEXT: fstpt (%eax) # sched: [1:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fst_fstp:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fst %st(0) # sched: [1:1.00]
-; SANDY-NEXT: fsts (%edx) # sched: [6:1.00]
-; SANDY-NEXT: fstl (%ecx) # sched: [6:1.00]
-; SANDY-NEXT: fstp %st(0) # sched: [1:1.00]
-; SANDY-NEXT: fstpl (%edx) # sched: [6:1.00]
-; SANDY-NEXT: fstpl (%ecx) # sched: [6:1.00]
-; SANDY-NEXT: fstpt (%eax) # sched: [6:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fst_fstp:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fst %st(0) # sched: [1:0.50]
-; HASWELL-NEXT: fsts (%edx) # sched: [1:1.00]
-; HASWELL-NEXT: fstl (%ecx) # sched: [1:1.00]
-; HASWELL-NEXT: fstp %st(0) # sched: [1:0.50]
-; HASWELL-NEXT: fstpl (%edx) # sched: [1:1.00]
-; HASWELL-NEXT: fstpl (%ecx) # sched: [1:1.00]
-; HASWELL-NEXT: fstpt (%eax) # sched: [1:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fst_fstp:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fst %st(0) # sched: [1:0.25]
-; BROADWELL-NEXT: fsts (%edx) # sched: [1:1.00]
-; BROADWELL-NEXT: fstl (%ecx) # sched: [1:1.00]
-; BROADWELL-NEXT: fstp %st(0) # sched: [1:0.25]
-; BROADWELL-NEXT: fstpl (%edx) # sched: [1:1.00]
-; BROADWELL-NEXT: fstpl (%ecx) # sched: [1:1.00]
-; BROADWELL-NEXT: fstpt (%eax) # sched: [1:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fst_fstp:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fst %st(0) # sched: [1:0.25]
-; SKYLAKE-NEXT: fsts (%edx) # sched: [1:1.00]
-; SKYLAKE-NEXT: fstl (%ecx) # sched: [1:1.00]
-; SKYLAKE-NEXT: fstp %st(0) # sched: [1:0.25]
-; SKYLAKE-NEXT: fstpl (%edx) # sched: [1:1.00]
-; SKYLAKE-NEXT: fstpl (%ecx) # sched: [1:1.00]
-; SKYLAKE-NEXT: fstpt (%eax) # sched: [1:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fst_fstp:
-; SKX: # %bb.0:
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: fst %st(0) # sched: [1:0.25]
-; SKX-NEXT: fsts (%edx) # sched: [1:1.00]
-; SKX-NEXT: fstl (%ecx) # sched: [1:1.00]
-; SKX-NEXT: fstp %st(0) # sched: [1:0.25]
-; SKX-NEXT: fstpl (%edx) # sched: [1:1.00]
-; SKX-NEXT: fstpl (%ecx) # sched: [1:1.00]
-; SKX-NEXT: fstpt (%eax) # sched: [1:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fst_fstp:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fst %st(0) # sched: [1:0.50]
-; BDVER2-NEXT: fsts (%edx) # sched: [1:1.00]
-; BDVER2-NEXT: fstl (%ecx) # sched: [1:1.00]
-; BDVER2-NEXT: fstp %st(0) # sched: [1:0.50]
-; BDVER2-NEXT: fstpl (%edx) # sched: [1:1.00]
-; BDVER2-NEXT: fstpl (%ecx) # sched: [1:1.00]
-; BDVER2-NEXT: fstpt (%eax) # sched: [1:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fst_fstp:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fst %st(0) # sched: [1:0.50]
-; BTVER2-NEXT: fsts (%edx) # sched: [1:1.00]
-; BTVER2-NEXT: fstl (%ecx) # sched: [1:1.00]
-; BTVER2-NEXT: fstp %st(0) # sched: [1:0.50]
-; BTVER2-NEXT: fstpl (%edx) # sched: [1:1.00]
-; BTVER2-NEXT: fstpl (%ecx) # sched: [1:1.00]
-; BTVER2-NEXT: fstpt (%eax) # sched: [1:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fst_fstp:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fst %st(0) # sched: [5:0.50]
-; ZNVER1-NEXT: fsts (%edx) # sched: [1:0.50]
-; ZNVER1-NEXT: fstl (%ecx) # sched: [1:0.50]
-; ZNVER1-NEXT: fstp %st(0) # sched: [5:0.50]
-; ZNVER1-NEXT: fstpl (%edx) # sched: [1:0.50]
-; ZNVER1-NEXT: fstpl (%ecx) # sched: [1:0.50]
-; ZNVER1-NEXT: fstpt (%eax) # sched: [5:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fst %st(0) \0A\09 fsts $0 \0A\09 fstl $1 \0A\09 fstp %st(0) \0A\09 fstpl $0 \0A\09 fstpl $1 \0A\09 fstpt $2", "*m,*m,*m"(i16* %a0, i32* %a1, i64 *%a2) nounwind
- ret void
-}
-
-define void @test_fstcw_fstenv_fstsw(i8* %a0) optsize {
-; GENERIC-LABEL: test_fstcw_fstenv_fstsw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: wait
-; GENERIC-NEXT: fnstcw (%eax)
-; GENERIC-NEXT: wait
-; GENERIC-NEXT: fnstenv (%eax)
-; GENERIC-NEXT: wait
-; GENERIC-NEXT: fnstsw (%eax)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fstcw_fstenv_fstsw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: wait # sched: [1:0.50]
-; ATOM-NEXT: fnstcw (%eax) # sched: [8:4.00]
-; ATOM-NEXT: wait # sched: [1:0.50]
-; ATOM-NEXT: fnstenv (%eax) # sched: [100:0.50]
-; ATOM-NEXT: wait # sched: [1:0.50]
-; ATOM-NEXT: fnstsw (%eax) # sched: [100:0.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fstcw_fstenv_fstsw:
-; SLM: # %bb.0:
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: wait # sched: [100:1.00]
-; SLM-NEXT: fnstcw (%eax) # sched: [1:0.50]
-; SLM-NEXT: wait # sched: [100:1.00]
-; SLM-NEXT: fnstenv (%eax) # sched: [100:1.00]
-; SLM-NEXT: wait # sched: [100:1.00]
-; SLM-NEXT: fnstsw (%eax) # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fstcw_fstenv_fstsw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: wait # sched: [100:0.33]
-; SANDY-NEXT: fnstcw (%eax) # sched: [7:1.00]
-; SANDY-NEXT: wait # sched: [100:0.33]
-; SANDY-NEXT: fnstenv (%eax) # sched: [100:0.33]
-; SANDY-NEXT: wait # sched: [100:0.33]
-; SANDY-NEXT: fnstsw (%eax) # sched: [7:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fstcw_fstenv_fstsw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: wait # sched: [2:0.50]
-; HASWELL-NEXT: fnstcw (%eax) # sched: [2:1.00]
-; HASWELL-NEXT: wait # sched: [2:0.50]
-; HASWELL-NEXT: fnstenv (%eax) # sched: [115:19.50]
-; HASWELL-NEXT: wait # sched: [2:0.50]
-; HASWELL-NEXT: fnstsw (%eax) # sched: [4:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fstcw_fstenv_fstsw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: wait # sched: [2:0.50]
-; BROADWELL-NEXT: fnstcw (%eax) # sched: [2:1.00]
-; BROADWELL-NEXT: wait # sched: [2:0.50]
-; BROADWELL-NEXT: fnstenv (%eax) # sched: [115:19.50]
-; BROADWELL-NEXT: wait # sched: [2:0.50]
-; BROADWELL-NEXT: fnstsw (%eax) # sched: [4:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fstcw_fstenv_fstsw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: wait # sched: [2:0.50]
-; SKYLAKE-NEXT: fnstcw (%eax) # sched: [2:1.00]
-; SKYLAKE-NEXT: wait # sched: [2:0.50]
-; SKYLAKE-NEXT: fnstenv (%eax) # sched: [106:19.50]
-; SKYLAKE-NEXT: wait # sched: [2:0.50]
-; SKYLAKE-NEXT: fnstsw (%eax) # sched: [3:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fstcw_fstenv_fstsw:
-; SKX: # %bb.0:
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: wait # sched: [2:0.50]
-; SKX-NEXT: fnstcw (%eax) # sched: [2:1.00]
-; SKX-NEXT: wait # sched: [2:0.50]
-; SKX-NEXT: fnstenv (%eax) # sched: [106:19.50]
-; SKX-NEXT: wait # sched: [2:0.50]
-; SKX-NEXT: fnstsw (%eax) # sched: [3:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fstcw_fstenv_fstsw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: wait # sched: [100:0.50]
-; BDVER2-NEXT: fnstcw (%eax) # sched: [1:0.50]
-; BDVER2-NEXT: wait # sched: [100:0.50]
-; BDVER2-NEXT: fnstenv (%eax) # sched: [100:0.50]
-; BDVER2-NEXT: wait # sched: [100:0.50]
-; BDVER2-NEXT: fnstsw (%eax) # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fstcw_fstenv_fstsw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: wait # sched: [100:0.50]
-; BTVER2-NEXT: fnstcw (%eax) # sched: [1:0.50]
-; BTVER2-NEXT: wait # sched: [100:0.50]
-; BTVER2-NEXT: fnstenv (%eax) # sched: [100:0.50]
-; BTVER2-NEXT: wait # sched: [100:0.50]
-; BTVER2-NEXT: fnstsw (%eax) # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fstcw_fstenv_fstsw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: wait # sched: [1:1.00]
-; ZNVER1-NEXT: fnstcw (%eax) # sched: [100:0.25]
-; ZNVER1-NEXT: wait # sched: [1:1.00]
-; ZNVER1-NEXT: fnstenv (%eax) # sched: [100:0.25]
-; ZNVER1-NEXT: wait # sched: [1:1.00]
-; ZNVER1-NEXT: fnstsw (%eax) # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fstcw $0 \0A\09 fstenv $0 \0A\09 fstsw $0", "*m"(i8* %a0) nounwind
- ret void
-}
-
-define void @test_fnstcw_fnstenv_fnstsw(i8* %a0) optsize {
-; GENERIC-LABEL: test_fnstcw_fnstenv_fnstsw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fnstcw (%eax)
-; GENERIC-NEXT: fnstenv (%eax)
-; GENERIC-NEXT: fnstsw (%eax)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fnstcw_fnstenv_fnstsw:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fnstcw (%eax) # sched: [8:4.00]
-; ATOM-NEXT: fnstenv (%eax) # sched: [100:0.50]
-; ATOM-NEXT: fnstsw (%eax) # sched: [100:0.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fnstcw_fnstenv_fnstsw:
-; SLM: # %bb.0:
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: fnstcw (%eax) # sched: [1:0.50]
-; SLM-NEXT: fnstenv (%eax) # sched: [100:1.00]
-; SLM-NEXT: fnstsw (%eax) # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fnstcw_fnstenv_fnstsw:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fnstcw (%eax) # sched: [7:1.00]
-; SANDY-NEXT: fnstenv (%eax) # sched: [100:0.33]
-; SANDY-NEXT: fnstsw (%eax) # sched: [7:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fnstcw_fnstenv_fnstsw:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fnstcw (%eax) # sched: [2:1.00]
-; HASWELL-NEXT: fnstenv (%eax) # sched: [115:19.50]
-; HASWELL-NEXT: fnstsw (%eax) # sched: [4:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fnstcw_fnstenv_fnstsw:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fnstcw (%eax) # sched: [2:1.00]
-; BROADWELL-NEXT: fnstenv (%eax) # sched: [115:19.50]
-; BROADWELL-NEXT: fnstsw (%eax) # sched: [4:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fnstcw_fnstenv_fnstsw:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fnstcw (%eax) # sched: [2:1.00]
-; SKYLAKE-NEXT: fnstenv (%eax) # sched: [106:19.50]
-; SKYLAKE-NEXT: fnstsw (%eax) # sched: [3:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fnstcw_fnstenv_fnstsw:
-; SKX: # %bb.0:
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: fnstcw (%eax) # sched: [2:1.00]
-; SKX-NEXT: fnstenv (%eax) # sched: [106:19.50]
-; SKX-NEXT: fnstsw (%eax) # sched: [3:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fnstcw_fnstenv_fnstsw:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fnstcw (%eax) # sched: [1:0.50]
-; BDVER2-NEXT: fnstenv (%eax) # sched: [100:0.50]
-; BDVER2-NEXT: fnstsw (%eax) # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fnstcw_fnstenv_fnstsw:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fnstcw (%eax) # sched: [1:0.50]
-; BTVER2-NEXT: fnstenv (%eax) # sched: [100:0.50]
-; BTVER2-NEXT: fnstsw (%eax) # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fnstcw_fnstenv_fnstsw:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fnstcw (%eax) # sched: [100:0.25]
-; ZNVER1-NEXT: fnstenv (%eax) # sched: [100:0.25]
-; ZNVER1-NEXT: fnstsw (%eax) # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fnstcw $0 \0A\09 fnstenv $0 \0A\09 fnstsw $0", "*m"(i8* %a0) nounwind
- ret void
-}
-
-define void @test_fsub(float *%a0, double *%a1) optsize {
-; GENERIC-LABEL: test_fsub:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fsub %st, %st(1)
-; GENERIC-NEXT: fsub %st(2)
-; GENERIC-NEXT: fsubs (%ecx)
-; GENERIC-NEXT: fsubl (%eax)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fsub:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fsub %st, %st(1) # sched: [5:5.00]
-; ATOM-NEXT: fsub %st(2) # sched: [5:5.00]
-; ATOM-NEXT: fsubs (%ecx) # sched: [5:5.00]
-; ATOM-NEXT: fsubl (%eax) # sched: [5:5.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fsub:
-; SLM: # %bb.0:
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: fsub %st, %st(1) # sched: [3:1.00]
-; SLM-NEXT: fsub %st(2) # sched: [3:1.00]
-; SLM-NEXT: fsubs (%ecx) # sched: [6:1.00]
-; SLM-NEXT: fsubl (%eax) # sched: [6:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fsub:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fsub %st, %st(1) # sched: [3:1.00]
-; SANDY-NEXT: fsub %st(2) # sched: [3:1.00]
-; SANDY-NEXT: fsubs (%ecx) # sched: [10:1.00]
-; SANDY-NEXT: fsubl (%eax) # sched: [10:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fsub:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fsub %st, %st(1) # sched: [3:1.00]
-; HASWELL-NEXT: fsub %st(2) # sched: [3:1.00]
-; HASWELL-NEXT: fsubs (%ecx) # sched: [10:1.00]
-; HASWELL-NEXT: fsubl (%eax) # sched: [10:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fsub:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fsub %st, %st(1) # sched: [3:1.00]
-; BROADWELL-NEXT: fsub %st(2) # sched: [3:1.00]
-; BROADWELL-NEXT: fsubs (%ecx) # sched: [9:1.00]
-; BROADWELL-NEXT: fsubl (%eax) # sched: [9:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fsub:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fsub %st, %st(1) # sched: [3:1.00]
-; SKYLAKE-NEXT: fsub %st(2) # sched: [3:1.00]
-; SKYLAKE-NEXT: fsubs (%ecx) # sched: [10:1.00]
-; SKYLAKE-NEXT: fsubl (%eax) # sched: [10:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fsub:
-; SKX: # %bb.0:
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: fsub %st, %st(1) # sched: [3:1.00]
-; SKX-NEXT: fsub %st(2) # sched: [3:1.00]
-; SKX-NEXT: fsubs (%ecx) # sched: [10:1.00]
-; SKX-NEXT: fsubl (%eax) # sched: [10:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fsub:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fsub %st, %st(1) # sched: [5:1.00]
-; BDVER2-NEXT: fsub %st(2) # sched: [5:1.00]
-; BDVER2-NEXT: fsubs (%ecx) # sched: [10:1.00]
-; BDVER2-NEXT: fsubl (%eax) # sched: [10:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fsub:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fsub %st, %st(1) # sched: [3:1.00]
-; BTVER2-NEXT: fsub %st(2) # sched: [3:1.00]
-; BTVER2-NEXT: fsubs (%ecx) # sched: [8:1.00]
-; BTVER2-NEXT: fsubl (%eax) # sched: [8:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fsub:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fsub %st, %st(1) # sched: [3:1.00]
-; ZNVER1-NEXT: fsub %st(2) # sched: [3:1.00]
-; ZNVER1-NEXT: fsubs (%ecx) # sched: [10:1.00]
-; ZNVER1-NEXT: fsubl (%eax) # sched: [10:1.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fsub %st(0), %st(1) \0A\09 fsub %st(2), %st(0) \0A\09 fsubs $0 \0A\09 fsubl $1", "*m,*m"(float *%a0, double *%a1) nounwind
- ret void
-}
-
-define void @test_fsubp_fisub(i16 *%a0, i32 *%a1) optsize {
-; GENERIC-LABEL: test_fsubp_fisub:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fsubp %st(1)
-; GENERIC-NEXT: fsubp %st(2)
-; GENERIC-NEXT: fisubs (%ecx)
-; GENERIC-NEXT: fisubl (%eax)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fsubp_fisub:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fsubp %st(1) # sched: [5:5.00]
-; ATOM-NEXT: fsubp %st(2) # sched: [5:5.00]
-; ATOM-NEXT: fisubs (%ecx) # sched: [5:5.00]
-; ATOM-NEXT: fisubl (%eax) # sched: [5:5.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fsubp_fisub:
-; SLM: # %bb.0:
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: fsubp %st(1) # sched: [3:1.00]
-; SLM-NEXT: fsubp %st(2) # sched: [3:1.00]
-; SLM-NEXT: fisubs (%ecx) # sched: [6:1.00]
-; SLM-NEXT: fisubl (%eax) # sched: [6:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fsubp_fisub:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fsubp %st(1) # sched: [3:1.00]
-; SANDY-NEXT: fsubp %st(2) # sched: [3:1.00]
-; SANDY-NEXT: fisubs (%ecx) # sched: [13:2.00]
-; SANDY-NEXT: fisubl (%eax) # sched: [13:2.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fsubp_fisub:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fsubp %st(1) # sched: [3:1.00]
-; HASWELL-NEXT: fsubp %st(2) # sched: [3:1.00]
-; HASWELL-NEXT: fisubs (%ecx) # sched: [13:2.00]
-; HASWELL-NEXT: fisubl (%eax) # sched: [13:2.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fsubp_fisub:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fsubp %st(1) # sched: [3:1.00]
-; BROADWELL-NEXT: fsubp %st(2) # sched: [3:1.00]
-; BROADWELL-NEXT: fisubs (%ecx) # sched: [12:2.00]
-; BROADWELL-NEXT: fisubl (%eax) # sched: [12:2.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fsubp_fisub:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fsubp %st(1) # sched: [3:1.00]
-; SKYLAKE-NEXT: fsubp %st(2) # sched: [3:1.00]
-; SKYLAKE-NEXT: fisubs (%ecx) # sched: [13:2.00]
-; SKYLAKE-NEXT: fisubl (%eax) # sched: [13:2.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fsubp_fisub:
-; SKX: # %bb.0:
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: fsubp %st(1) # sched: [3:1.00]
-; SKX-NEXT: fsubp %st(2) # sched: [3:1.00]
-; SKX-NEXT: fisubs (%ecx) # sched: [13:2.00]
-; SKX-NEXT: fisubl (%eax) # sched: [13:2.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fsubp_fisub:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fsubp %st(1) # sched: [5:1.00]
-; BDVER2-NEXT: fsubp %st(2) # sched: [5:1.00]
-; BDVER2-NEXT: fisubs (%ecx) # sched: [10:1.00]
-; BDVER2-NEXT: fisubl (%eax) # sched: [10:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fsubp_fisub:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fsubp %st(1) # sched: [3:1.00]
-; BTVER2-NEXT: fsubp %st(2) # sched: [3:1.00]
-; BTVER2-NEXT: fisubs (%ecx) # sched: [8:1.00]
-; BTVER2-NEXT: fisubl (%eax) # sched: [8:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fsubp_fisub:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fsubp %st(1) # sched: [3:1.00]
-; ZNVER1-NEXT: fsubp %st(2) # sched: [3:1.00]
-; ZNVER1-NEXT: fisubs (%ecx) # sched: [10:1.00]
-; ZNVER1-NEXT: fisubl (%eax) # sched: [10:1.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fsubp \0A\09 fsubp %st(2), %st(0) \0A\09 fisubs $0 \0A\09 fisubl $1", "*m,*m"(i16 *%a0, i32 *%a1) nounwind
- ret void
-}
-
-define void @test_fsubr(float *%a0, double *%a1) optsize {
-; GENERIC-LABEL: test_fsubr:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fsubr %st, %st(1)
-; GENERIC-NEXT: fsubr %st(2)
-; GENERIC-NEXT: fsubrs (%ecx)
-; GENERIC-NEXT: fsubrl (%eax)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fsubr:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fsubr %st, %st(1) # sched: [5:5.00]
-; ATOM-NEXT: fsubr %st(2) # sched: [5:5.00]
-; ATOM-NEXT: fsubrs (%ecx) # sched: [5:5.00]
-; ATOM-NEXT: fsubrl (%eax) # sched: [5:5.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fsubr:
-; SLM: # %bb.0:
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: fsubr %st, %st(1) # sched: [3:1.00]
-; SLM-NEXT: fsubr %st(2) # sched: [3:1.00]
-; SLM-NEXT: fsubrs (%ecx) # sched: [6:1.00]
-; SLM-NEXT: fsubrl (%eax) # sched: [6:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fsubr:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fsubr %st, %st(1) # sched: [3:1.00]
-; SANDY-NEXT: fsubr %st(2) # sched: [3:1.00]
-; SANDY-NEXT: fsubrs (%ecx) # sched: [10:1.00]
-; SANDY-NEXT: fsubrl (%eax) # sched: [10:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fsubr:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fsubr %st, %st(1) # sched: [3:1.00]
-; HASWELL-NEXT: fsubr %st(2) # sched: [3:1.00]
-; HASWELL-NEXT: fsubrs (%ecx) # sched: [10:1.00]
-; HASWELL-NEXT: fsubrl (%eax) # sched: [10:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fsubr:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fsubr %st, %st(1) # sched: [3:1.00]
-; BROADWELL-NEXT: fsubr %st(2) # sched: [3:1.00]
-; BROADWELL-NEXT: fsubrs (%ecx) # sched: [9:1.00]
-; BROADWELL-NEXT: fsubrl (%eax) # sched: [9:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fsubr:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fsubr %st, %st(1) # sched: [3:1.00]
-; SKYLAKE-NEXT: fsubr %st(2) # sched: [3:1.00]
-; SKYLAKE-NEXT: fsubrs (%ecx) # sched: [10:1.00]
-; SKYLAKE-NEXT: fsubrl (%eax) # sched: [10:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fsubr:
-; SKX: # %bb.0:
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: fsubr %st, %st(1) # sched: [3:1.00]
-; SKX-NEXT: fsubr %st(2) # sched: [3:1.00]
-; SKX-NEXT: fsubrs (%ecx) # sched: [10:1.00]
-; SKX-NEXT: fsubrl (%eax) # sched: [10:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fsubr:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fsubr %st, %st(1) # sched: [5:1.00]
-; BDVER2-NEXT: fsubr %st(2) # sched: [5:1.00]
-; BDVER2-NEXT: fsubrs (%ecx) # sched: [10:1.00]
-; BDVER2-NEXT: fsubrl (%eax) # sched: [10:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fsubr:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fsubr %st, %st(1) # sched: [3:1.00]
-; BTVER2-NEXT: fsubr %st(2) # sched: [3:1.00]
-; BTVER2-NEXT: fsubrs (%ecx) # sched: [8:1.00]
-; BTVER2-NEXT: fsubrl (%eax) # sched: [8:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fsubr:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fsubr %st, %st(1) # sched: [3:1.00]
-; ZNVER1-NEXT: fsubr %st(2) # sched: [3:1.00]
-; ZNVER1-NEXT: fsubrs (%ecx) # sched: [10:1.00]
-; ZNVER1-NEXT: fsubrl (%eax) # sched: [10:1.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fsubr %st(0), %st(1) \0A\09 fsubr %st(2), %st(0) \0A\09 fsubrs $0 \0A\09 fsubrl $1", "*m,*m"(float *%a0, double *%a1) nounwind
- ret void
-}
-
-define void @test_fsubrp_fisubr(i16 *%a0, i32 *%a1) optsize {
-; GENERIC-LABEL: test_fsubrp_fisubr:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fsubrp %st(1)
-; GENERIC-NEXT: fsubrp %st(2)
-; GENERIC-NEXT: fisubrs (%ecx)
-; GENERIC-NEXT: fisubrl (%eax)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fsubrp_fisubr:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fsubrp %st(1) # sched: [5:5.00]
-; ATOM-NEXT: fsubrp %st(2) # sched: [5:5.00]
-; ATOM-NEXT: fisubrs (%ecx) # sched: [5:5.00]
-; ATOM-NEXT: fisubrl (%eax) # sched: [5:5.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fsubrp_fisubr:
-; SLM: # %bb.0:
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: fsubrp %st(1) # sched: [3:1.00]
-; SLM-NEXT: fsubrp %st(2) # sched: [3:1.00]
-; SLM-NEXT: fisubrs (%ecx) # sched: [6:1.00]
-; SLM-NEXT: fisubrl (%eax) # sched: [6:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fsubrp_fisubr:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fsubrp %st(1) # sched: [3:1.00]
-; SANDY-NEXT: fsubrp %st(2) # sched: [3:1.00]
-; SANDY-NEXT: fisubrs (%ecx) # sched: [13:2.00]
-; SANDY-NEXT: fisubrl (%eax) # sched: [13:2.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fsubrp_fisubr:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fsubrp %st(1) # sched: [3:1.00]
-; HASWELL-NEXT: fsubrp %st(2) # sched: [3:1.00]
-; HASWELL-NEXT: fisubrs (%ecx) # sched: [13:2.00]
-; HASWELL-NEXT: fisubrl (%eax) # sched: [13:2.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fsubrp_fisubr:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fsubrp %st(1) # sched: [3:1.00]
-; BROADWELL-NEXT: fsubrp %st(2) # sched: [3:1.00]
-; BROADWELL-NEXT: fisubrs (%ecx) # sched: [12:2.00]
-; BROADWELL-NEXT: fisubrl (%eax) # sched: [12:2.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fsubrp_fisubr:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fsubrp %st(1) # sched: [3:1.00]
-; SKYLAKE-NEXT: fsubrp %st(2) # sched: [3:1.00]
-; SKYLAKE-NEXT: fisubrs (%ecx) # sched: [13:2.00]
-; SKYLAKE-NEXT: fisubrl (%eax) # sched: [13:2.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fsubrp_fisubr:
-; SKX: # %bb.0:
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: fsubrp %st(1) # sched: [3:1.00]
-; SKX-NEXT: fsubrp %st(2) # sched: [3:1.00]
-; SKX-NEXT: fisubrs (%ecx) # sched: [13:2.00]
-; SKX-NEXT: fisubrl (%eax) # sched: [13:2.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fsubrp_fisubr:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fsubrp %st(1) # sched: [5:1.00]
-; BDVER2-NEXT: fsubrp %st(2) # sched: [5:1.00]
-; BDVER2-NEXT: fisubrs (%ecx) # sched: [10:1.00]
-; BDVER2-NEXT: fisubrl (%eax) # sched: [10:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fsubrp_fisubr:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fsubrp %st(1) # sched: [3:1.00]
-; BTVER2-NEXT: fsubrp %st(2) # sched: [3:1.00]
-; BTVER2-NEXT: fisubrs (%ecx) # sched: [8:1.00]
-; BTVER2-NEXT: fisubrl (%eax) # sched: [8:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fsubrp_fisubr:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fsubrp %st(1) # sched: [3:1.00]
-; ZNVER1-NEXT: fsubrp %st(2) # sched: [3:1.00]
-; ZNVER1-NEXT: fisubrs (%ecx) # sched: [10:1.00]
-; ZNVER1-NEXT: fisubrl (%eax) # sched: [10:1.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fsubrp \0A\09 fsubrp %st(2), %st(0) \0A\09 fisubrs $0 \0A\09 fisubrl $1", "*m,*m"(i16 *%a0, i32 *%a1) nounwind
- ret void
-}
-
-define void @test_ftst() optsize {
-; GENERIC-LABEL: test_ftst:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: ftst
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_ftst:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: ftst # sched: [9:4.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_ftst:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: ftst # sched: [3:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_ftst:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: ftst # sched: [3:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_ftst:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: ftst # sched: [1:1.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_ftst:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: ftst # sched: [3:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_ftst:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: ftst # sched: [2:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_ftst:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: ftst # sched: [2:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_ftst:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: ftst # sched: [1:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_ftst:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: ftst # sched: [3:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_ftst:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: ftst # sched: [1:1.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "ftst", ""() nounwind
- ret void
-}
-
-define void @test_fucom_fucomp_fucompp() optsize {
-; GENERIC-LABEL: test_fucom_fucomp_fucompp:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fucom %st(1)
-; GENERIC-NEXT: fucom %st(3)
-; GENERIC-NEXT: fucomp %st(1)
-; GENERIC-NEXT: fucomp %st(3)
-; GENERIC-NEXT: fucompp
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fucom_fucomp_fucompp:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fucom %st(1) # sched: [1:1.00]
-; ATOM-NEXT: fucom %st(3) # sched: [1:1.00]
-; ATOM-NEXT: fucomp %st(1) # sched: [1:1.00]
-; ATOM-NEXT: fucomp %st(3) # sched: [1:1.00]
-; ATOM-NEXT: fucompp # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fucom_fucomp_fucompp:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: fucom %st(1) # sched: [3:1.00]
-; SLM-NEXT: fucom %st(3) # sched: [3:1.00]
-; SLM-NEXT: fucomp %st(1) # sched: [3:1.00]
-; SLM-NEXT: fucomp %st(3) # sched: [3:1.00]
-; SLM-NEXT: fucompp # sched: [3:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fucom_fucomp_fucompp:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fucom %st(1) # sched: [1:1.00]
-; SANDY-NEXT: fucom %st(3) # sched: [1:1.00]
-; SANDY-NEXT: fucomp %st(1) # sched: [1:1.00]
-; SANDY-NEXT: fucomp %st(3) # sched: [1:1.00]
-; SANDY-NEXT: fucompp # sched: [3:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fucom_fucomp_fucompp:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fucom %st(1) # sched: [1:1.00]
-; HASWELL-NEXT: fucom %st(3) # sched: [1:1.00]
-; HASWELL-NEXT: fucomp %st(1) # sched: [1:1.00]
-; HASWELL-NEXT: fucomp %st(3) # sched: [1:1.00]
-; HASWELL-NEXT: fucompp # sched: [1:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fucom_fucomp_fucompp:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fucom %st(1) # sched: [1:1.00]
-; BROADWELL-NEXT: fucom %st(3) # sched: [1:1.00]
-; BROADWELL-NEXT: fucomp %st(1) # sched: [1:1.00]
-; BROADWELL-NEXT: fucomp %st(3) # sched: [1:1.00]
-; BROADWELL-NEXT: fucompp # sched: [3:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fucom_fucomp_fucompp:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fucom %st(1) # sched: [1:1.00]
-; SKYLAKE-NEXT: fucom %st(3) # sched: [1:1.00]
-; SKYLAKE-NEXT: fucomp %st(1) # sched: [1:1.00]
-; SKYLAKE-NEXT: fucomp %st(3) # sched: [1:1.00]
-; SKYLAKE-NEXT: fucompp # sched: [2:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fucom_fucomp_fucompp:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: fucom %st(1) # sched: [1:1.00]
-; SKX-NEXT: fucom %st(3) # sched: [1:1.00]
-; SKX-NEXT: fucomp %st(1) # sched: [1:1.00]
-; SKX-NEXT: fucomp %st(3) # sched: [1:1.00]
-; SKX-NEXT: fucompp # sched: [2:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fucom_fucomp_fucompp:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fucom %st(1) # sched: [1:1.00]
-; BDVER2-NEXT: fucom %st(3) # sched: [1:1.00]
-; BDVER2-NEXT: fucomp %st(1) # sched: [1:1.00]
-; BDVER2-NEXT: fucomp %st(3) # sched: [1:1.00]
-; BDVER2-NEXT: fucompp # sched: [1:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fucom_fucomp_fucompp:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fucom %st(1) # sched: [3:1.00]
-; BTVER2-NEXT: fucom %st(3) # sched: [3:1.00]
-; BTVER2-NEXT: fucomp %st(1) # sched: [3:1.00]
-; BTVER2-NEXT: fucomp %st(3) # sched: [3:1.00]
-; BTVER2-NEXT: fucompp # sched: [3:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fucom_fucomp_fucompp:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fucom %st(1) # sched: [1:1.00]
-; ZNVER1-NEXT: fucom %st(3) # sched: [1:1.00]
-; ZNVER1-NEXT: fucomp %st(1) # sched: [1:1.00]
-; ZNVER1-NEXT: fucomp %st(3) # sched: [1:1.00]
-; ZNVER1-NEXT: fucompp # sched: [1:1.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fucom \0A\09 fucom %st(3) \0A\09 fucomp \0A\09 fucomp %st(3) \0A\09 fucompp", ""() nounwind
- ret void
-}
-
-define void @test_fucomi_fucomip() optsize {
-; GENERIC-LABEL: test_fucomi_fucomip:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fucomi %st(3)
-; GENERIC-NEXT: fucompi %st(3)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fucomi_fucomip:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fucomi %st(3) # sched: [9:4.50]
-; ATOM-NEXT: fucompi %st(3) # sched: [9:4.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fucomi_fucomip:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: fucomi %st(3) # sched: [3:1.00]
-; SLM-NEXT: fucompi %st(3) # sched: [3:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fucomi_fucomip:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fucomi %st(3) # sched: [3:1.00]
-; SANDY-NEXT: fucompi %st(3) # sched: [3:1.00]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fucomi_fucomip:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fucomi %st(3) # sched: [1:0.50]
-; HASWELL-NEXT: fucompi %st(3) # sched: [1:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fucomi_fucomip:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fucomi %st(3) # sched: [3:1.00]
-; BROADWELL-NEXT: fucompi %st(3) # sched: [3:1.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fucomi_fucomip:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fucomi %st(3) # sched: [2:1.00]
-; SKYLAKE-NEXT: fucompi %st(3) # sched: [2:1.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fucomi_fucomip:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: fucomi %st(3) # sched: [2:1.00]
-; SKX-NEXT: fucompi %st(3) # sched: [2:1.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fucomi_fucomip:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fucomi %st(3) # sched: [1:1.00]
-; BDVER2-NEXT: fucompi %st(3) # sched: [1:1.00]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fucomi_fucomip:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fucomi %st(3) # sched: [3:1.00]
-; BTVER2-NEXT: fucompi %st(3) # sched: [3:1.00]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fucomi_fucomip:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fucomi %st(3) # sched: [9:0.50]
-; ZNVER1-NEXT: fucompi %st(3) # sched: [9:0.50]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fucomi %st(3) \0A\09 fucomip %st(3)", ""() nounwind
- ret void
-}
-
-define void @test_fwait() optsize {
-; GENERIC-LABEL: test_fwait:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: wait
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fwait:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: wait # sched: [1:0.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fwait:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: wait # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fwait:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: wait # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fwait:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: wait # sched: [2:0.50]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fwait:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: wait # sched: [2:0.50]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fwait:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: wait # sched: [2:0.50]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fwait:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: wait # sched: [2:0.50]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fwait:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: wait # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fwait:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: wait # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fwait:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: wait # sched: [1:1.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fwait", ""() nounwind
- ret void
-}
-
-define void @test_fxam() optsize {
-; GENERIC-LABEL: test_fxam:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fxam
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fxam:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fxam # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fxam:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: fxam # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fxam:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fxam # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fxam:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fxam # sched: [1:2.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fxam:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fxam # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fxam:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fxam # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fxam:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: fxam # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fxam:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fxam # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fxam:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fxam # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fxam:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fxam # sched: [1:1.00]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fxam", ""() nounwind
- ret void
-}
-
-define void @test_fxch() optsize {
-; GENERIC-LABEL: test_fxch:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fxch %st(1)
-; GENERIC-NEXT: fxch %st(3)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fxch:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fxch %st(1) # sched: [1:1.00]
-; ATOM-NEXT: fxch %st(3) # sched: [1:1.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fxch:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: fxch %st(1) # sched: [1:0.50]
-; SLM-NEXT: fxch %st(3) # sched: [1:0.50]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fxch:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fxch %st(1) # sched: [1:0.33]
-; SANDY-NEXT: fxch %st(3) # sched: [1:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fxch:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fxch %st(1) # sched: [17:4.00]
-; HASWELL-NEXT: fxch %st(3) # sched: [17:4.00]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fxch:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fxch %st(1) # sched: [14:4.00]
-; BROADWELL-NEXT: fxch %st(3) # sched: [14:4.00]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fxch:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fxch %st(1) # sched: [17:4.00]
-; SKYLAKE-NEXT: fxch %st(3) # sched: [17:4.00]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fxch:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: fxch %st(1) # sched: [17:4.00]
-; SKX-NEXT: fxch %st(3) # sched: [17:4.00]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fxch:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fxch %st(1) # sched: [1:0.50]
-; BDVER2-NEXT: fxch %st(3) # sched: [1:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fxch:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fxch %st(1) # sched: [1:0.50]
-; BTVER2-NEXT: fxch %st(3) # sched: [1:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fxch:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fxch %st(1) # sched: [1:0.25]
-; ZNVER1-NEXT: fxch %st(3) # sched: [1:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fxch \0A\09 fxch %st(3)", ""() nounwind
- ret void
-}
-
-define void @test_fxrstor_fxsave(i8* %a0) optsize {
-; GENERIC-LABEL: test_fxrstor_fxsave:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fxrstor (%eax)
-; GENERIC-NEXT: fxsave (%eax)
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fxrstor_fxsave:
-; ATOM: # %bb.0:
-; ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [1:1.00]
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fxrstor (%eax) # sched: [141:70.50]
-; ATOM-NEXT: fxsave (%eax) # sched: [140:70.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fxrstor_fxsave:
-; SLM: # %bb.0:
-; SLM-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; SLM-NEXT: #APP
-; SLM-NEXT: fxrstor (%eax) # sched: [100:1.00]
-; SLM-NEXT: fxsave (%eax) # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fxrstor_fxsave:
-; SANDY: # %bb.0:
-; SANDY-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fxrstor (%eax) # sched: [5:2.00]
-; SANDY-NEXT: fxsave (%eax) # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fxrstor_fxsave:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fxrstor (%eax) # sched: [64:16.50]
-; HASWELL-NEXT: fxsave (%eax) # sched: [100:0.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fxrstor_fxsave:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fxrstor (%eax) # sched: [63:16.50]
-; BROADWELL-NEXT: fxsave (%eax) # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fxrstor_fxsave:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fxrstor (%eax) # sched: [63:16.50]
-; SKYLAKE-NEXT: fxsave (%eax) # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fxrstor_fxsave:
-; SKX: # %bb.0:
-; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; SKX-NEXT: #APP
-; SKX-NEXT: fxrstor (%eax) # sched: [63:16.50]
-; SKX-NEXT: fxsave (%eax) # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fxrstor_fxsave:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fxrstor (%eax) # sched: [100:0.50]
-; BDVER2-NEXT: fxsave (%eax) # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fxrstor_fxsave:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [3:1.00]
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fxrstor (%eax) # sched: [100:0.50]
-; BTVER2-NEXT: fxsave (%eax) # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fxrstor_fxsave:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fxrstor (%eax) # sched: [100:0.25]
-; ZNVER1-NEXT: fxsave (%eax) # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fxrstor $0 \0A\09 fxsave $0", "*m"(i8 *%a0) nounwind
- ret void
-}
-
-define void @test_fxtract() optsize {
-; GENERIC-LABEL: test_fxtract:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fxtract
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fxtract:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fxtract # sched: [25:12.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fxtract:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: fxtract # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fxtract:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fxtract # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fxtract:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fxtract # sched: [15:4.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fxtract:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fxtract # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fxtract:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fxtract # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fxtract:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: fxtract # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fxtract:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fxtract # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fxtract:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fxtract # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fxtract:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fxtract # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fxtract", ""() nounwind
- ret void
-}
-
-define void @test_fyl2x() optsize {
-; GENERIC-LABEL: test_fyl2x:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fyl2x
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fyl2x:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fyl2x # sched: [146:73.00]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fyl2x:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: fyl2x # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fyl2x:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fyl2x # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fyl2x:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fyl2x # sched: [100:0.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fyl2x:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fyl2x # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fyl2x:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fyl2x # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fyl2x:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: fyl2x # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fyl2x:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fyl2x # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fyl2x:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fyl2x # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fyl2x:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fyl2x # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fyl2x", ""() nounwind
- ret void
-}
-
-define void @test_fyl2xp1() optsize {
-; GENERIC-LABEL: test_fyl2xp1:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: fyl2xp1
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retl
-;
-; ATOM-LABEL: test_fyl2xp1:
-; ATOM: # %bb.0:
-; ATOM-NEXT: #APP
-; ATOM-NEXT: fyl2xp1 # sched: [147:73.50]
-; ATOM-NEXT: #NO_APP
-; ATOM-NEXT: retl # sched: [79:39.50]
-;
-; SLM-LABEL: test_fyl2xp1:
-; SLM: # %bb.0:
-; SLM-NEXT: #APP
-; SLM-NEXT: fyl2xp1 # sched: [100:1.00]
-; SLM-NEXT: #NO_APP
-; SLM-NEXT: retl # sched: [4:1.00]
-;
-; SANDY-LABEL: test_fyl2xp1:
-; SANDY: # %bb.0:
-; SANDY-NEXT: #APP
-; SANDY-NEXT: fyl2xp1 # sched: [100:0.33]
-; SANDY-NEXT: #NO_APP
-; SANDY-NEXT: retl # sched: [6:1.00]
-;
-; HASWELL-LABEL: test_fyl2xp1:
-; HASWELL: # %bb.0:
-; HASWELL-NEXT: #APP
-; HASWELL-NEXT: fyl2xp1 # sched: [100:0.25]
-; HASWELL-NEXT: #NO_APP
-; HASWELL-NEXT: retl # sched: [7:1.00]
-;
-; BROADWELL-LABEL: test_fyl2xp1:
-; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: #APP
-; BROADWELL-NEXT: fyl2xp1 # sched: [100:0.25]
-; BROADWELL-NEXT: #NO_APP
-; BROADWELL-NEXT: retl # sched: [6:0.50]
-;
-; SKYLAKE-LABEL: test_fyl2xp1:
-; SKYLAKE: # %bb.0:
-; SKYLAKE-NEXT: #APP
-; SKYLAKE-NEXT: fyl2xp1 # sched: [100:0.25]
-; SKYLAKE-NEXT: #NO_APP
-; SKYLAKE-NEXT: retl # sched: [6:0.50]
-;
-; SKX-LABEL: test_fyl2xp1:
-; SKX: # %bb.0:
-; SKX-NEXT: #APP
-; SKX-NEXT: fyl2xp1 # sched: [100:0.25]
-; SKX-NEXT: #NO_APP
-; SKX-NEXT: retl # sched: [6:0.50]
-;
-; BDVER2-LABEL: test_fyl2xp1:
-; BDVER2: # %bb.0:
-; BDVER2-NEXT: #APP
-; BDVER2-NEXT: fyl2xp1 # sched: [100:0.50]
-; BDVER2-NEXT: #NO_APP
-; BDVER2-NEXT: retl # sched: [5:1.00]
-;
-; BTVER2-LABEL: test_fyl2xp1:
-; BTVER2: # %bb.0:
-; BTVER2-NEXT: #APP
-; BTVER2-NEXT: fyl2xp1 # sched: [100:0.50]
-; BTVER2-NEXT: #NO_APP
-; BTVER2-NEXT: retl # sched: [4:1.00]
-;
-; ZNVER1-LABEL: test_fyl2xp1:
-; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: #APP
-; ZNVER1-NEXT: fyl2xp1 # sched: [100:0.25]
-; ZNVER1-NEXT: #NO_APP
-; ZNVER1-NEXT: retl # sched: [1:0.50]
- tail call void asm sideeffect "fyl2xp1", ""() nounwind
- ret void
-}
diff --git a/llvm/test/CodeGen/X86/xop-schedule.ll b/llvm/test/CodeGen/X86/xop-schedule.ll
deleted file mode 100644
index ed621039d38..00000000000
--- a/llvm/test/CodeGen/X86/xop-schedule.ll
+++ /dev/null
@@ -1,1818 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+xop | FileCheck %s --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver1 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER12 --check-prefix=BDVER1
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER12 --check-prefix=BDVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver3 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER3
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver4 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER4
-
-define void @test_vfrczpd(<2 x double> %a0, <4 x double> %a1, <2 x double> *%a2, <4 x double> *%a3) {
-; GENERIC-LABEL: test_vfrczpd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfrczpd %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: vfrczpd %ymm1, %ymm1 # sched: [3:1.00]
-; GENERIC-NEXT: vfrczpd (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: vfrczpd (%rsi), %ymm1 # sched: [10:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vfrczpd:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vfrczpd %xmm0, %xmm0 # sched: [10:1.00]
-; BDVER12-NEXT: vfrczpd %ymm1, %ymm1 # sched: [10:2.00]
-; BDVER12-NEXT: vfrczpd (%rdi), %xmm0 # sched: [15:1.00]
-; BDVER12-NEXT: vfrczpd (%rsi), %ymm1 # sched: [15:2.00]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: vzeroupper # sched: [46:4.00]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vfrczpd:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vfrczpd %xmm0, %xmm0
-; BDVER3-NEXT: vfrczpd %ymm1, %ymm1
-; BDVER3-NEXT: vfrczpd (%rdi), %xmm0
-; BDVER3-NEXT: vfrczpd (%rsi), %ymm1
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: vzeroupper
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vfrczpd:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vfrczpd %xmm0, %xmm0
-; BDVER4-NEXT: vfrczpd %ymm1, %ymm1
-; BDVER4-NEXT: vfrczpd (%rdi), %xmm0
-; BDVER4-NEXT: vfrczpd (%rsi), %ymm1
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: vzeroupper
-; BDVER4-NEXT: retq
- call void asm sideeffect "vfrczpd $0, $0 \0a\09 vfrczpd $1, $1 \0a\09 vfrczpd $2, $0 \0a\09 vfrczpd $3, $1", "x,x,*m,*m"(<2 x double> %a0, <4 x double> %a1, <2 x double> *%a2, <4 x double> *%a3)
- ret void
-}
-
-define void @test_vfrczps(<4 x float> %a0, <4 x double> %a1, <4 x float> *%a2, <4 x double> *%a3) {
-; GENERIC-LABEL: test_vfrczps:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfrczps %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: vfrczps %ymm1, %ymm1 # sched: [3:1.00]
-; GENERIC-NEXT: vfrczps (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: vfrczps (%rsi), %ymm1 # sched: [10:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vfrczps:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vfrczps %xmm0, %xmm0 # sched: [10:1.00]
-; BDVER12-NEXT: vfrczps %ymm1, %ymm1 # sched: [10:2.00]
-; BDVER12-NEXT: vfrczps (%rdi), %xmm0 # sched: [15:1.00]
-; BDVER12-NEXT: vfrczps (%rsi), %ymm1 # sched: [15:2.00]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: vzeroupper # sched: [46:4.00]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vfrczps:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vfrczps %xmm0, %xmm0
-; BDVER3-NEXT: vfrczps %ymm1, %ymm1
-; BDVER3-NEXT: vfrczps (%rdi), %xmm0
-; BDVER3-NEXT: vfrczps (%rsi), %ymm1
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: vzeroupper
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vfrczps:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vfrczps %xmm0, %xmm0
-; BDVER4-NEXT: vfrczps %ymm1, %ymm1
-; BDVER4-NEXT: vfrczps (%rdi), %xmm0
-; BDVER4-NEXT: vfrczps (%rsi), %ymm1
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: vzeroupper
-; BDVER4-NEXT: retq
- call void asm sideeffect "vfrczps $0, $0 \0a\09 vfrczps $1, $1 \0a\09 vfrczps $2, $0 \0a\09 vfrczps $3, $1", "x,x,*m,*m"(<4 x float> %a0, <4 x double> %a1, <4 x float> *%a2, <4 x double> *%a3)
- ret void
-}
-
-define void @test_vfrczsd(<2 x double> %a0, <2 x double> *%a1) {
-; GENERIC-LABEL: test_vfrczsd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfrczsd %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: vfrczsd (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vfrczsd:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vfrczsd %xmm0, %xmm0 # sched: [10:1.00]
-; BDVER12-NEXT: vfrczsd (%rdi), %xmm0 # sched: [15:1.00]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vfrczsd:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vfrczsd %xmm0, %xmm0
-; BDVER3-NEXT: vfrczsd (%rdi), %xmm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vfrczsd:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vfrczsd %xmm0, %xmm0
-; BDVER4-NEXT: vfrczsd (%rdi), %xmm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: retq
- call void asm sideeffect "vfrczsd $0, $0 \0a\09 vfrczsd $1, $0", "x,*m"(<2 x double> %a0, <2 x double> *%a1)
- ret void
-}
-
-define void @test_vfrczss(<4 x float> %a0, <4 x double> *%a1) {
-; GENERIC-LABEL: test_vfrczss:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vfrczss %xmm0, %xmm0 # sched: [3:1.00]
-; GENERIC-NEXT: vfrczss (%rdi), %xmm0 # sched: [9:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vfrczss:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vfrczss %xmm0, %xmm0 # sched: [10:1.00]
-; BDVER12-NEXT: vfrczss (%rdi), %xmm0 # sched: [15:1.00]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vfrczss:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vfrczss %xmm0, %xmm0
-; BDVER3-NEXT: vfrczss (%rdi), %xmm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vfrczss:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vfrczss %xmm0, %xmm0
-; BDVER4-NEXT: vfrczss (%rdi), %xmm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: retq
- call void asm sideeffect "vfrczss $0, $0 \0a\09 vfrczss $1, $0", "x,*m"(<4 x float> %a0, <4 x double> *%a1)
- ret void
-}
-
-define void @test_vpcmov_128(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) {
-; GENERIC-LABEL: test_vpcmov_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpcmov (%rdi), %xmm1, %xmm0, %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: vpcmov %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vpcmov_128:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER12-NEXT: vpcmov (%rdi), %xmm1, %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER12-NEXT: vpcmov %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vpcmov_128:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: vpcmov (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: vpcmov %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vpcmov_128:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: vpcmov (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: vpcmov %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: retq
- call void asm sideeffect "vpcmov $2, $1, $0, $0 \0a\09 vpcmov $3, $1, $0, $0 \0a\09 vpcmov $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
- ret void
-}
-
-define void @test_vpcmov_256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2, <4 x i64> *%a3) {
-; GENERIC-LABEL: test_vpcmov_256:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpcmov (%rdi), %ymm1, %ymm0, %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: vpcmov %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vpcmov_256:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:0.50]
-; BDVER12-NEXT: vpcmov (%rdi), %ymm1, %ymm0, %ymm0 # sched: [7:1.00]
-; BDVER12-NEXT: vpcmov %ymm2, (%rdi), %ymm0, %ymm0 # sched: [7:1.00]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: vzeroupper # sched: [46:4.00]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vpcmov_256:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER3-NEXT: vpcmov (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER3-NEXT: vpcmov %ymm2, (%rdi), %ymm0, %ymm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: vzeroupper
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vpcmov_256:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER4-NEXT: vpcmov (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER4-NEXT: vpcmov %ymm2, (%rdi), %ymm0, %ymm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: vzeroupper
-; BDVER4-NEXT: retq
- call void asm sideeffect "vpcmov $2, $1, $0, $0 \0a\09 vpcmov $3, $1, $0, $0 \0a\09 vpcmov $2, $3, $0, $0", "x,x,x,*m"(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2, <4 x i64> *%a3)
- ret void
-}
-
-define void @test_vpcom(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
-; GENERIC-LABEL: test_vpcom:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vpcomb $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpcomd $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpcomq $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpcomw $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpcomb $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: vpcomd $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: vpcomq $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: vpcomw $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vpcom:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vpcomb $3, %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER12-NEXT: vpcomd $3, %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER12-NEXT: vpcomq $3, %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER12-NEXT: vpcomw $3, %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER12-NEXT: vpcomb $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER12-NEXT: vpcomd $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER12-NEXT: vpcomq $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER12-NEXT: vpcomw $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vpcom:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vpcomb $3, %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: vpcomd $3, %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: vpcomq $3, %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: vpcomw $3, %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: vpcomb $3, (%rdi), %xmm0, %xmm0
-; BDVER3-NEXT: vpcomd $3, (%rdi), %xmm0, %xmm0
-; BDVER3-NEXT: vpcomq $3, (%rdi), %xmm0, %xmm0
-; BDVER3-NEXT: vpcomw $3, (%rdi), %xmm0, %xmm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vpcom:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vpcomb $3, %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: vpcomd $3, %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: vpcomq $3, %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: vpcomw $3, %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: vpcomb $3, (%rdi), %xmm0, %xmm0
-; BDVER4-NEXT: vpcomd $3, (%rdi), %xmm0, %xmm0
-; BDVER4-NEXT: vpcomq $3, (%rdi), %xmm0, %xmm0
-; BDVER4-NEXT: vpcomw $3, (%rdi), %xmm0, %xmm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: retq
- call void asm sideeffect "vpcomb $3, $1, $0, $0 \0a\09 vpcomd $3, $1, $0, $0 \0a\09 vpcomq $3, $1, $0, $0 \0a\09 vpcomw $3, $1, $0, $0 \0a\09 vpcomb $3, $2, $0, $0 \0a\09 vpcomd $3, $2, $0, $0 \0a\09 vpcomq $3, $2, $0, $0 \0a\09 vpcomw $3, $2, $0, $0", "x,x,*m,i"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2, i8 3)
- ret void
-}
-
-define void @test_vpcomu(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
-; GENERIC-LABEL: test_vpcomu:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vpcomub $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpcomud $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpcomuq $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpcomuw $3, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpcomub $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: vpcomud $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: vpcomuq $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: vpcomuw $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vpcomu:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vpcomub $3, %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER12-NEXT: vpcomud $3, %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER12-NEXT: vpcomuq $3, %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER12-NEXT: vpcomuw $3, %xmm1, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER12-NEXT: vpcomub $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER12-NEXT: vpcomud $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER12-NEXT: vpcomuq $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER12-NEXT: vpcomuw $3, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vpcomu:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vpcomub $3, %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: vpcomud $3, %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: vpcomuq $3, %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: vpcomuw $3, %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: vpcomub $3, (%rdi), %xmm0, %xmm0
-; BDVER3-NEXT: vpcomud $3, (%rdi), %xmm0, %xmm0
-; BDVER3-NEXT: vpcomuq $3, (%rdi), %xmm0, %xmm0
-; BDVER3-NEXT: vpcomuw $3, (%rdi), %xmm0, %xmm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vpcomu:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vpcomub $3, %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: vpcomud $3, %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: vpcomuq $3, %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: vpcomuw $3, %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: vpcomub $3, (%rdi), %xmm0, %xmm0
-; BDVER4-NEXT: vpcomud $3, (%rdi), %xmm0, %xmm0
-; BDVER4-NEXT: vpcomuq $3, (%rdi), %xmm0, %xmm0
-; BDVER4-NEXT: vpcomuw $3, (%rdi), %xmm0, %xmm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: retq
- call void asm sideeffect "vpcomub $3, $1, $0, $0 \0a\09 vpcomud $3, $1, $0, $0 \0a\09 vpcomuq $3, $1, $0, $0 \0a\09 vpcomuw $3, $1, $0, $0 \0a\09 vpcomub $3, $2, $0, $0 \0a\09 vpcomud $3, $2, $0, $0 \0a\09 vpcomuq $3, $2, $0, $0 \0a\09 vpcomuw $3, $2, $0, $0", "x,x,*m,i"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2, i8 3)
- ret void
-}
-
-define void @test_vpermil2pd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) {
-; GENERIC-LABEL: test_vpermil2pd_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vpermil2pd $3, %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpermil2pd $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: vpermil2pd $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vpermil2pd_128:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vpermil2pd $3, %xmm2, %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; BDVER12-NEXT: vpermil2pd $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
-; BDVER12-NEXT: vpermil2pd $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [8:2.00]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vpermil2pd_128:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vpermil2pd $3, %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: vpermil2pd $3, %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER3-NEXT: vpermil2pd $3, (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vpermil2pd_128:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vpermil2pd $3, %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: vpermil2pd $3, %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER4-NEXT: vpermil2pd $3, (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: retq
- call void asm sideeffect "vpermil2pd $4, $2, $1, $0, $0 \0a\09 vpermil2pd $4, $2, $3, $0, $0 \0a\09 vpermil2pd $4, $3, $1, $0, $0", "x,x,x,*m,i"(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3, i8 3)
- ret void
-}
-
-define void @test_vpermil2pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) {
-; GENERIC-LABEL: test_vpermil2pd_256:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vpermil2pd $3, %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpermil2pd $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: vpermil2pd $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vpermil2pd_256:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vpermil2pd $3, %ymm2, %ymm1, %ymm0, %ymm0 # sched: [3:3.00]
-; BDVER12-NEXT: vpermil2pd $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:3.00]
-; BDVER12-NEXT: vpermil2pd $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [8:3.00]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: vzeroupper # sched: [46:4.00]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vpermil2pd_256:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vpermil2pd $3, %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER3-NEXT: vpermil2pd $3, %ymm2, (%rdi), %ymm0, %ymm0
-; BDVER3-NEXT: vpermil2pd $3, (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: vzeroupper
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vpermil2pd_256:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vpermil2pd $3, %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER4-NEXT: vpermil2pd $3, %ymm2, (%rdi), %ymm0, %ymm0
-; BDVER4-NEXT: vpermil2pd $3, (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: vzeroupper
-; BDVER4-NEXT: retq
- call void asm sideeffect "vpermil2pd $4, $2, $1, $0, $0 \0a\09 vpermil2pd $4, $2, $3, $0, $0 \0a\09 vpermil2pd $4, $3, $1, $0, $0", "x,x,x,*m,i"(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3, i8 3)
- ret void
-}
-
-define void @test_vpermil2ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) {
-; GENERIC-LABEL: test_vpermil2ps_128:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vpermil2ps $3, %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpermil2ps $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: vpermil2ps $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vpermil2ps_128:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vpermil2ps $3, %xmm2, %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; BDVER12-NEXT: vpermil2ps $3, %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
-; BDVER12-NEXT: vpermil2ps $3, (%rdi), %xmm1, %xmm0, %xmm0 # sched: [8:2.00]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vpermil2ps_128:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vpermil2ps $3, %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: vpermil2ps $3, %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER3-NEXT: vpermil2ps $3, (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vpermil2ps_128:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vpermil2ps $3, %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: vpermil2ps $3, %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER4-NEXT: vpermil2ps $3, (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: retq
- call void asm sideeffect "vpermil2ps $4, $2, $1, $0, $0 \0a\09 vpermil2ps $4, $2, $3, $0, $0 \0a\09 vpermil2ps $4, $3, $1, $0, $0", "x,x,x,*m,i"(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3, i8 3)
- ret void
-}
-
-define void @test_vpermil2ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) {
-; GENERIC-LABEL: test_vpermil2ps_256:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vpermil2ps $3, %ymm2, %ymm1, %ymm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpermil2ps $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: vpermil2ps $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [8:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: vzeroupper # sched: [1:1.00]
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vpermil2ps_256:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vpermil2ps $3, %ymm2, %ymm1, %ymm0, %ymm0 # sched: [3:3.00]
-; BDVER12-NEXT: vpermil2ps $3, %ymm2, (%rdi), %ymm0, %ymm0 # sched: [8:3.00]
-; BDVER12-NEXT: vpermil2ps $3, (%rdi), %ymm1, %ymm0, %ymm0 # sched: [8:3.00]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: vzeroupper # sched: [46:4.00]
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vpermil2ps_256:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vpermil2ps $3, %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER3-NEXT: vpermil2ps $3, %ymm2, (%rdi), %ymm0, %ymm0
-; BDVER3-NEXT: vpermil2ps $3, (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: vzeroupper
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vpermil2ps_256:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vpermil2ps $3, %ymm2, %ymm1, %ymm0, %ymm0
-; BDVER4-NEXT: vpermil2ps $3, %ymm2, (%rdi), %ymm0, %ymm0
-; BDVER4-NEXT: vpermil2ps $3, (%rdi), %ymm1, %ymm0, %ymm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: vzeroupper
-; BDVER4-NEXT: retq
- call void asm sideeffect "vpermil2ps $4, $2, $1, $0, $0 \0a\09 vpermil2ps $4, $2, $3, $0, $0 \0a\09 vpermil2ps $4, $3, $1, $0, $0", "x,x,x,*m,i"(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3, i8 3)
- ret void
-}
-
-define void @test_vphaddbd(<2 x i64> %a0, <2 x i64> *%a1) {
-; GENERIC-LABEL: test_vphaddbd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vphaddbd %xmm0, %xmm0 # sched: [3:1.50]
-; GENERIC-NEXT: vphaddbd (%rdi), %xmm0 # sched: [9:1.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vphaddbd:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vphaddbd %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER12-NEXT: vphaddbd (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vphaddbd:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vphaddbd %xmm0, %xmm0
-; BDVER3-NEXT: vphaddbd (%rdi), %xmm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vphaddbd:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vphaddbd %xmm0, %xmm0
-; BDVER4-NEXT: vphaddbd (%rdi), %xmm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: retq
- call void asm sideeffect "vphaddbd $0, $0 \0a\09 vphaddbd $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
- ret void
-}
-
-define void @test_vphaddbq(<2 x i64> %a0, <2 x i64> *%a1) {
-; GENERIC-LABEL: test_vphaddbq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vphaddbq %xmm0, %xmm0 # sched: [3:1.50]
-; GENERIC-NEXT: vphaddbq (%rdi), %xmm0 # sched: [9:1.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vphaddbq:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vphaddbq %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER12-NEXT: vphaddbq (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vphaddbq:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vphaddbq %xmm0, %xmm0
-; BDVER3-NEXT: vphaddbq (%rdi), %xmm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vphaddbq:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vphaddbq %xmm0, %xmm0
-; BDVER4-NEXT: vphaddbq (%rdi), %xmm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: retq
- call void asm sideeffect "vphaddbq $0, $0 \0a\09 vphaddbq $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
- ret void
-}
-
-define void @test_vphaddbw(<2 x i64> %a0, <2 x i64> *%a1) {
-; GENERIC-LABEL: test_vphaddbw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vphaddbw %xmm0, %xmm0 # sched: [3:1.50]
-; GENERIC-NEXT: vphaddbw (%rdi), %xmm0 # sched: [9:1.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vphaddbw:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vphaddbw %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER12-NEXT: vphaddbw (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vphaddbw:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vphaddbw %xmm0, %xmm0
-; BDVER3-NEXT: vphaddbw (%rdi), %xmm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vphaddbw:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vphaddbw %xmm0, %xmm0
-; BDVER4-NEXT: vphaddbw (%rdi), %xmm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: retq
- call void asm sideeffect "vphaddbw $0, $0 \0a\09 vphaddbw $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
- ret void
-}
-
-define void @test_vphadddq(<2 x i64> %a0, <2 x i64> *%a1) {
-; GENERIC-LABEL: test_vphadddq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vphadddq %xmm0, %xmm0 # sched: [3:1.50]
-; GENERIC-NEXT: vphadddq (%rdi), %xmm0 # sched: [9:1.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vphadddq:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vphadddq %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER12-NEXT: vphadddq (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vphadddq:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vphadddq %xmm0, %xmm0
-; BDVER3-NEXT: vphadddq (%rdi), %xmm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vphadddq:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vphadddq %xmm0, %xmm0
-; BDVER4-NEXT: vphadddq (%rdi), %xmm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: retq
- call void asm sideeffect "vphadddq $0, $0 \0a\09 vphadddq $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
- ret void
-}
-
-define void @test_vphaddubd(<2 x i64> %a0, <2 x i64> *%a1) {
-; GENERIC-LABEL: test_vphaddubd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vphaddubd %xmm0, %xmm0 # sched: [3:1.50]
-; GENERIC-NEXT: vphaddubd (%rdi), %xmm0 # sched: [9:1.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vphaddubd:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vphaddubd %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER12-NEXT: vphaddubd (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vphaddubd:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vphaddubd %xmm0, %xmm0
-; BDVER3-NEXT: vphaddubd (%rdi), %xmm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vphaddubd:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vphaddubd %xmm0, %xmm0
-; BDVER4-NEXT: vphaddubd (%rdi), %xmm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: retq
- call void asm sideeffect "vphaddubd $0, $0 \0a\09 vphaddubd $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
- ret void
-}
-
-define void @test_vphaddubq(<2 x i64> %a0, <2 x i64> *%a1) {
-; GENERIC-LABEL: test_vphaddubq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vphaddubq %xmm0, %xmm0 # sched: [3:1.50]
-; GENERIC-NEXT: vphaddubq (%rdi), %xmm0 # sched: [9:1.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vphaddubq:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vphaddubq %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER12-NEXT: vphaddubq (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vphaddubq:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vphaddubq %xmm0, %xmm0
-; BDVER3-NEXT: vphaddubq (%rdi), %xmm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vphaddubq:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vphaddubq %xmm0, %xmm0
-; BDVER4-NEXT: vphaddubq (%rdi), %xmm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: retq
- call void asm sideeffect "vphaddubq $0, $0 \0a\09 vphaddubq $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
- ret void
-}
-
-define void @test_vphaddubw(<2 x i64> %a0, <2 x i64> *%a1) {
-; GENERIC-LABEL: test_vphaddubw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vphaddubw %xmm0, %xmm0 # sched: [3:1.50]
-; GENERIC-NEXT: vphaddubw (%rdi), %xmm0 # sched: [9:1.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vphaddubw:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vphaddubw %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER12-NEXT: vphaddubw (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vphaddubw:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vphaddubw %xmm0, %xmm0
-; BDVER3-NEXT: vphaddubw (%rdi), %xmm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vphaddubw:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vphaddubw %xmm0, %xmm0
-; BDVER4-NEXT: vphaddubw (%rdi), %xmm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: retq
- call void asm sideeffect "vphaddubw $0, $0 \0a\09 vphaddubw $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
- ret void
-}
-
-define void @test_vphaddudq(<2 x i64> %a0, <2 x i64> *%a1) {
-; GENERIC-LABEL: test_vphaddudq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vphaddudq %xmm0, %xmm0 # sched: [3:1.50]
-; GENERIC-NEXT: vphaddudq (%rdi), %xmm0 # sched: [9:1.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vphaddudq:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vphaddudq %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER12-NEXT: vphaddudq (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vphaddudq:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vphaddudq %xmm0, %xmm0
-; BDVER3-NEXT: vphaddudq (%rdi), %xmm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vphaddudq:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vphaddudq %xmm0, %xmm0
-; BDVER4-NEXT: vphaddudq (%rdi), %xmm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: retq
- call void asm sideeffect "vphaddudq $0, $0 \0a\09 vphaddudq $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
- ret void
-}
-
-define void @test_vphadduwd(<2 x i64> %a0, <2 x i64> *%a1) {
-; GENERIC-LABEL: test_vphadduwd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vphadduwd %xmm0, %xmm0 # sched: [3:1.50]
-; GENERIC-NEXT: vphadduwd (%rdi), %xmm0 # sched: [9:1.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vphadduwd:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vphadduwd %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER12-NEXT: vphadduwd (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vphadduwd:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vphadduwd %xmm0, %xmm0
-; BDVER3-NEXT: vphadduwd (%rdi), %xmm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vphadduwd:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vphadduwd %xmm0, %xmm0
-; BDVER4-NEXT: vphadduwd (%rdi), %xmm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: retq
- call void asm sideeffect "vphadduwd $0, $0 \0a\09 vphadduwd $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
- ret void
-}
-
-define void @test_vphadduwq(<2 x i64> %a0, <2 x i64> *%a1) {
-; GENERIC-LABEL: test_vphadduwq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vphadduwq %xmm0, %xmm0 # sched: [3:1.50]
-; GENERIC-NEXT: vphadduwq (%rdi), %xmm0 # sched: [9:1.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vphadduwq:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vphadduwq %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER12-NEXT: vphadduwq (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vphadduwq:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vphadduwq %xmm0, %xmm0
-; BDVER3-NEXT: vphadduwq (%rdi), %xmm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vphadduwq:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vphadduwq %xmm0, %xmm0
-; BDVER4-NEXT: vphadduwq (%rdi), %xmm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: retq
- call void asm sideeffect "vphadduwq $0, $0 \0a\09 vphadduwq $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
- ret void
-}
-
-define void @test_vphaddwd(<2 x i64> %a0, <2 x i64> *%a1) {
-; GENERIC-LABEL: test_vphaddwd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vphaddwd %xmm0, %xmm0 # sched: [3:1.50]
-; GENERIC-NEXT: vphaddwd (%rdi), %xmm0 # sched: [9:1.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vphaddwd:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vphaddwd %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER12-NEXT: vphaddwd (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vphaddwd:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vphaddwd %xmm0, %xmm0
-; BDVER3-NEXT: vphaddwd (%rdi), %xmm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vphaddwd:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vphaddwd %xmm0, %xmm0
-; BDVER4-NEXT: vphaddwd (%rdi), %xmm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: retq
- call void asm sideeffect "vphaddwd $0, $0 \0a\09 vphaddwd $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
- ret void
-}
-
-define void @test_vphaddwq(<2 x i64> %a0, <2 x i64> *%a1) {
-; GENERIC-LABEL: test_vphaddwq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vphaddwq %xmm0, %xmm0 # sched: [3:1.50]
-; GENERIC-NEXT: vphaddwq (%rdi), %xmm0 # sched: [9:1.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vphaddwq:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vphaddwq %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER12-NEXT: vphaddwq (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vphaddwq:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vphaddwq %xmm0, %xmm0
-; BDVER3-NEXT: vphaddwq (%rdi), %xmm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vphaddwq:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vphaddwq %xmm0, %xmm0
-; BDVER4-NEXT: vphaddwq (%rdi), %xmm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: retq
- call void asm sideeffect "vphaddwq $0, $0 \0a\09 vphaddwq $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
- ret void
-}
-
-define void @test_vphsubbw(<2 x i64> %a0, <2 x i64> *%a1) {
-; GENERIC-LABEL: test_vphsubbw:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vphsubbw %xmm0, %xmm0 # sched: [3:1.50]
-; GENERIC-NEXT: vphsubbw (%rdi), %xmm0 # sched: [9:1.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vphsubbw:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vphsubbw %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER12-NEXT: vphsubbw (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vphsubbw:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vphsubbw %xmm0, %xmm0
-; BDVER3-NEXT: vphsubbw (%rdi), %xmm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vphsubbw:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vphsubbw %xmm0, %xmm0
-; BDVER4-NEXT: vphsubbw (%rdi), %xmm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: retq
- call void asm sideeffect "vphsubbw $0, $0 \0a\09 vphsubbw $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
- ret void
-}
-
-define void @test_vphsubdq(<2 x i64> %a0, <2 x i64> *%a1) {
-; GENERIC-LABEL: test_vphsubdq:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vphsubdq %xmm0, %xmm0 # sched: [3:1.50]
-; GENERIC-NEXT: vphsubdq (%rdi), %xmm0 # sched: [9:1.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vphsubdq:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vphsubdq %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER12-NEXT: vphsubdq (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vphsubdq:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vphsubdq %xmm0, %xmm0
-; BDVER3-NEXT: vphsubdq (%rdi), %xmm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vphsubdq:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vphsubdq %xmm0, %xmm0
-; BDVER4-NEXT: vphsubdq (%rdi), %xmm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: retq
- call void asm sideeffect "vphsubdq $0, $0 \0a\09 vphsubdq $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
- ret void
-}
-
-define void @test_vphsubwd(<2 x i64> %a0, <2 x i64> *%a1) {
-; GENERIC-LABEL: test_vphsubwd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vphsubwd %xmm0, %xmm0 # sched: [3:1.50]
-; GENERIC-NEXT: vphsubwd (%rdi), %xmm0 # sched: [9:1.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vphsubwd:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vphsubwd %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER12-NEXT: vphsubwd (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vphsubwd:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vphsubwd %xmm0, %xmm0
-; BDVER3-NEXT: vphsubwd (%rdi), %xmm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vphsubwd:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vphsubwd %xmm0, %xmm0
-; BDVER4-NEXT: vphsubwd (%rdi), %xmm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: retq
- call void asm sideeffect "vphsubwd $0, $0 \0a\09 vphsubwd $1, $0", "x,*m"(<2 x i64> %a0, <2 x i64> *%a1)
- ret void
-}
-
-define void @test_vpmacsdd(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) {
-; GENERIC-LABEL: test_vpmacsdd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vpmacsdd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: vpmacsdd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vpmacsdd:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vpmacsdd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
-; BDVER12-NEXT: vpmacsdd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vpmacsdd:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vpmacsdd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: vpmacsdd %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vpmacsdd:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vpmacsdd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: vpmacsdd %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: retq
- call void asm sideeffect "vpmacsdd $2, $1, $0, $0 \0a\09 vpmacsdd $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
- ret void
-}
-
-define void @test_vpmacsdqh(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) {
-; GENERIC-LABEL: test_vpmacsdqh:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vpmacsdqh %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: vpmacsdqh %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vpmacsdqh:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vpmacsdqh %xmm2, %xmm1, %xmm0, %xmm0 # sched: [4:2.00]
-; BDVER12-NEXT: vpmacsdqh %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vpmacsdqh:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vpmacsdqh %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: vpmacsdqh %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vpmacsdqh:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vpmacsdqh %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: vpmacsdqh %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: retq
- call void asm sideeffect "vpmacsdqh $2, $1, $0, $0 \0a\09 vpmacsdqh $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
- ret void
-}
-
-define void @test_vpmacsdql(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) {
-; GENERIC-LABEL: test_vpmacsdql:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vpmacsdql %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: vpmacsdql %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vpmacsdql:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vpmacsdql %xmm2, %xmm1, %xmm0, %xmm0 # sched: [4:2.00]
-; BDVER12-NEXT: vpmacsdql %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vpmacsdql:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vpmacsdql %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: vpmacsdql %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vpmacsdql:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vpmacsdql %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: vpmacsdql %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: retq
- call void asm sideeffect "vpmacsdql $2, $1, $0, $0 \0a\09 vpmacsdql $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
- ret void
-}
-
-define void @test_vpmacssdd(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) {
-; GENERIC-LABEL: test_vpmacssdd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vpmacssdd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: vpmacssdd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vpmacssdd:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vpmacssdd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
-; BDVER12-NEXT: vpmacssdd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vpmacssdd:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vpmacssdd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: vpmacssdd %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vpmacssdd:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vpmacssdd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: vpmacssdd %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: retq
- call void asm sideeffect "vpmacssdd $2, $1, $0, $0 \0a\09 vpmacssdd $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
- ret void
-}
-
-define void @test_vpmacssdqh(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) {
-; GENERIC-LABEL: test_vpmacssdqh:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vpmacssdqh %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: vpmacssdqh %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vpmacssdqh:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vpmacssdqh %xmm2, %xmm1, %xmm0, %xmm0 # sched: [4:2.00]
-; BDVER12-NEXT: vpmacssdqh %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vpmacssdqh:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vpmacssdqh %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: vpmacssdqh %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vpmacssdqh:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vpmacssdqh %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: vpmacssdqh %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: retq
- call void asm sideeffect "vpmacssdqh $2, $1, $0, $0 \0a\09 vpmacssdqh $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
- ret void
-}
-
-define void @test_vpmacssdql(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) {
-; GENERIC-LABEL: test_vpmacssdql:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vpmacssdql %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: vpmacssdql %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vpmacssdql:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vpmacssdql %xmm2, %xmm1, %xmm0, %xmm0 # sched: [4:2.00]
-; BDVER12-NEXT: vpmacssdql %xmm2, (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vpmacssdql:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vpmacssdql %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: vpmacssdql %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vpmacssdql:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vpmacssdql %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: vpmacssdql %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: retq
- call void asm sideeffect "vpmacssdql $2, $1, $0, $0 \0a\09 vpmacssdql $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
- ret void
-}
-
-define void @test_vpmacsswd(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) {
-; GENERIC-LABEL: test_vpmacsswd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: vpmacsswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vpmacsswd:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; BDVER12-NEXT: vpmacsswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vpmacsswd:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: vpmacsswd %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vpmacsswd:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: vpmacsswd %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: retq
- call void asm sideeffect "vpmacsswd $2, $1, $0, $0 \0a\09 vpmacsswd $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
- ret void
-}
-
-define void @test_vpmacssww(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) {
-; GENERIC-LABEL: test_vpmacssww:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vpmacssww %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: vpmacssww %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vpmacssww:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vpmacssww %xmm2, %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; BDVER12-NEXT: vpmacssww %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vpmacssww:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vpmacssww %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: vpmacssww %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vpmacssww:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vpmacssww %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: vpmacssww %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: retq
- call void asm sideeffect "vpmacssww $2, $1, $0, $0 \0a\09 vpmacssww $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
- ret void
-}
-
-define void @test_vpmacswd(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) {
-; GENERIC-LABEL: test_vpmacswd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vpmacswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: vpmacswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vpmacswd:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vpmacswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; BDVER12-NEXT: vpmacswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vpmacswd:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vpmacswd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: vpmacswd %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vpmacswd:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vpmacswd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: vpmacswd %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: retq
- call void asm sideeffect "vpmacswd $2, $1, $0, $0 \0a\09 vpmacswd $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
- ret void
-}
-
-define void @test_vpmacsww(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) {
-; GENERIC-LABEL: test_vpmacsww:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vpmacsww %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: vpmacsww %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vpmacsww:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vpmacsww %xmm2, %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; BDVER12-NEXT: vpmacsww %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vpmacsww:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vpmacsww %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: vpmacsww %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vpmacsww:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vpmacsww %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: vpmacsww %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: retq
- call void asm sideeffect "vpmacsww $2, $1, $0, $0 \0a\09 vpmacsww $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
- ret void
-}
-
-define void @test_vpmadcsswd(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) {
-; GENERIC-LABEL: test_vpmadcsswd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: vpmadcsswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vpmadcsswd:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; BDVER12-NEXT: vpmadcsswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vpmadcsswd:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: vpmadcsswd %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vpmadcsswd:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: vpmadcsswd %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: retq
- call void asm sideeffect "vpmadcsswd $2, $1, $0, $0 \0a\09 vpmadcsswd $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
- ret void
-}
-
-define void @test_vpmadcswd(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) {
-; GENERIC-LABEL: test_vpmadcswd:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
-; GENERIC-NEXT: vpmadcswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vpmadcswd:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
-; BDVER12-NEXT: vpmadcswd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vpmadcswd:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: vpmadcswd %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vpmadcswd:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: vpmadcswd %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: retq
- call void asm sideeffect "vpmadcswd $2, $1, $0, $0 \0a\09 vpmadcswd $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
- ret void
-}
-
-define void @test_vpperm(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3) {
-; GENERIC-LABEL: test_vpperm:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
-; GENERIC-NEXT: vpperm (%rdi), %xmm1, %xmm0, %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: vpperm %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vpperm:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
-; BDVER12-NEXT: vpperm (%rdi), %xmm1, %xmm0, %xmm0 # sched: [8:2.00]
-; BDVER12-NEXT: vpperm %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vpperm:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: vpperm (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: vpperm %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vpperm:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: vpperm (%rdi), %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: vpperm %xmm2, (%rdi), %xmm0, %xmm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: retq
- call void asm sideeffect "vpperm $2, $1, $0, $0 \0A\09 vpperm $3, $1, $0, $0 \0A\09 vpperm $2, $3, $0, $0", "x,x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2, <2 x i64> *%a3)
- ret void
-}
-
-define void @test_vprot(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
-; GENERIC-LABEL: test_vprot:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vprotb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vprotd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vprotq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vprotw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vprotb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: vprotd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: vprotq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: vprotw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: vprotb %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: vprotd %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: vprotq %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: vprotw %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: vprotb $7, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vprotd $7, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vprotq $7, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vprotw $7, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vprotb $7, (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: vprotd $7, (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: vprotq $7, (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: vprotw $7, (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vprot:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vprotb %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; BDVER12-NEXT: vprotd %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; BDVER12-NEXT: vprotq %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; BDVER12-NEXT: vprotw %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; BDVER12-NEXT: vprotb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; BDVER12-NEXT: vprotd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; BDVER12-NEXT: vprotq (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; BDVER12-NEXT: vprotw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; BDVER12-NEXT: vprotb %xmm0, (%rdi), %xmm0 # sched: [8:0.50]
-; BDVER12-NEXT: vprotd %xmm0, (%rdi), %xmm0 # sched: [8:0.50]
-; BDVER12-NEXT: vprotq %xmm0, (%rdi), %xmm0 # sched: [8:0.50]
-; BDVER12-NEXT: vprotw %xmm0, (%rdi), %xmm0 # sched: [8:0.50]
-; BDVER12-NEXT: vprotb $7, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER12-NEXT: vprotd $7, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER12-NEXT: vprotq $7, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER12-NEXT: vprotw $7, %xmm0, %xmm0 # sched: [2:0.50]
-; BDVER12-NEXT: vprotb $7, (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER12-NEXT: vprotd $7, (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER12-NEXT: vprotq $7, (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER12-NEXT: vprotw $7, (%rdi), %xmm0 # sched: [7:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vprot:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vprotb %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: vprotd %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: vprotq %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: vprotw %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: vprotb (%rdi), %xmm0, %xmm0
-; BDVER3-NEXT: vprotd (%rdi), %xmm0, %xmm0
-; BDVER3-NEXT: vprotq (%rdi), %xmm0, %xmm0
-; BDVER3-NEXT: vprotw (%rdi), %xmm0, %xmm0
-; BDVER3-NEXT: vprotb %xmm0, (%rdi), %xmm0
-; BDVER3-NEXT: vprotd %xmm0, (%rdi), %xmm0
-; BDVER3-NEXT: vprotq %xmm0, (%rdi), %xmm0
-; BDVER3-NEXT: vprotw %xmm0, (%rdi), %xmm0
-; BDVER3-NEXT: vprotb $7, %xmm0, %xmm0
-; BDVER3-NEXT: vprotd $7, %xmm0, %xmm0
-; BDVER3-NEXT: vprotq $7, %xmm0, %xmm0
-; BDVER3-NEXT: vprotw $7, %xmm0, %xmm0
-; BDVER3-NEXT: vprotb $7, (%rdi), %xmm0
-; BDVER3-NEXT: vprotd $7, (%rdi), %xmm0
-; BDVER3-NEXT: vprotq $7, (%rdi), %xmm0
-; BDVER3-NEXT: vprotw $7, (%rdi), %xmm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vprot:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vprotb %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: vprotd %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: vprotq %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: vprotw %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: vprotb (%rdi), %xmm0, %xmm0
-; BDVER4-NEXT: vprotd (%rdi), %xmm0, %xmm0
-; BDVER4-NEXT: vprotq (%rdi), %xmm0, %xmm0
-; BDVER4-NEXT: vprotw (%rdi), %xmm0, %xmm0
-; BDVER4-NEXT: vprotb %xmm0, (%rdi), %xmm0
-; BDVER4-NEXT: vprotd %xmm0, (%rdi), %xmm0
-; BDVER4-NEXT: vprotq %xmm0, (%rdi), %xmm0
-; BDVER4-NEXT: vprotw %xmm0, (%rdi), %xmm0
-; BDVER4-NEXT: vprotb $7, %xmm0, %xmm0
-; BDVER4-NEXT: vprotd $7, %xmm0, %xmm0
-; BDVER4-NEXT: vprotq $7, %xmm0, %xmm0
-; BDVER4-NEXT: vprotw $7, %xmm0, %xmm0
-; BDVER4-NEXT: vprotb $7, (%rdi), %xmm0
-; BDVER4-NEXT: vprotd $7, (%rdi), %xmm0
-; BDVER4-NEXT: vprotq $7, (%rdi), %xmm0
-; BDVER4-NEXT: vprotw $7, (%rdi), %xmm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: retq
- call void asm sideeffect "vprotb $1, $0, $0 \0A\09 vprotd $1, $0, $0 \0A\09 vprotq $1, $0, $0 \0A\09 vprotw $1, $0, $0 \0A\09 vprotb $2, $0, $0 \0A\09 vprotd $2, $0, $0 \0A\09 vprotq $2, $0, $0 \0A\09 vprotw $2, $0, $0 \0A\09 vprotb $0, $2, $0 \0A\09 vprotd $0, $2, $0 \0A\09 vprotq $0, $2, $0 \0A\09 vprotw $0, $2, $0 \0A\09 vprotb $3, $0, $0 \0A\09 vprotd $3, $0, $0 \0A\09 vprotq $3, $0, $0 \0A\09 vprotw $3, $0, $0 \0A\09 vprotb $3, $2, $0 \0A\09 vprotd $3, $2, $0 \0A\09 vprotq $3, $2, $0 \0A\09 vprotw $3, $2, $0", "x,x,*m,i"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2, i8 7)
- ret void
-}
-
-define void @test_vpsha(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
-; GENERIC-LABEL: test_vpsha:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vpshab %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpshad %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpshaq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpshaw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpshab (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: vpshad (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: vpshaq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: vpshaw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: vpshab %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: vpshad %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: vpshaq %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: vpshaw %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vpsha:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vpshab %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; BDVER12-NEXT: vpshad %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; BDVER12-NEXT: vpshaq %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; BDVER12-NEXT: vpshaw %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; BDVER12-NEXT: vpshab (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; BDVER12-NEXT: vpshad (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; BDVER12-NEXT: vpshaq (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; BDVER12-NEXT: vpshaw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; BDVER12-NEXT: vpshab %xmm0, (%rdi), %xmm0 # sched: [8:0.50]
-; BDVER12-NEXT: vpshad %xmm0, (%rdi), %xmm0 # sched: [8:0.50]
-; BDVER12-NEXT: vpshaq %xmm0, (%rdi), %xmm0 # sched: [8:0.50]
-; BDVER12-NEXT: vpshaw %xmm0, (%rdi), %xmm0 # sched: [8:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vpsha:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vpshab %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: vpshad %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: vpshaq %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: vpshaw %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: vpshab (%rdi), %xmm0, %xmm0
-; BDVER3-NEXT: vpshad (%rdi), %xmm0, %xmm0
-; BDVER3-NEXT: vpshaq (%rdi), %xmm0, %xmm0
-; BDVER3-NEXT: vpshaw (%rdi), %xmm0, %xmm0
-; BDVER3-NEXT: vpshab %xmm0, (%rdi), %xmm0
-; BDVER3-NEXT: vpshad %xmm0, (%rdi), %xmm0
-; BDVER3-NEXT: vpshaq %xmm0, (%rdi), %xmm0
-; BDVER3-NEXT: vpshaw %xmm0, (%rdi), %xmm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vpsha:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vpshab %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: vpshad %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: vpshaq %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: vpshaw %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: vpshab (%rdi), %xmm0, %xmm0
-; BDVER4-NEXT: vpshad (%rdi), %xmm0, %xmm0
-; BDVER4-NEXT: vpshaq (%rdi), %xmm0, %xmm0
-; BDVER4-NEXT: vpshaw (%rdi), %xmm0, %xmm0
-; BDVER4-NEXT: vpshab %xmm0, (%rdi), %xmm0
-; BDVER4-NEXT: vpshad %xmm0, (%rdi), %xmm0
-; BDVER4-NEXT: vpshaq %xmm0, (%rdi), %xmm0
-; BDVER4-NEXT: vpshaw %xmm0, (%rdi), %xmm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: retq
- call void asm sideeffect "vpshab $1, $0, $0 \0A\09 vpshad $1, $0, $0 \0A\09 vpshaq $1, $0, $0 \0A\09 vpshaw $1, $0, $0 \0A\09 vpshab $2, $0, $0 \0A\09 vpshad $2, $0, $0 \0A\09 vpshaq $2, $0, $0 \0A\09 vpshaw $2, $0, $0 \0A\09 vpshab $0, $2, $0 \0A\09 vpshad $0, $2, $0 \0A\09 vpshaq $0, $2, $0 \0A\09 vpshaw $0, $2, $0", "x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2)
- ret void
-}
-
-define void @test_vpshl(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
-; GENERIC-LABEL: test_vpshl:
-; GENERIC: # %bb.0:
-; GENERIC-NEXT: #APP
-; GENERIC-NEXT: vpshlb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpshld %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpshlq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpshlw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpshlb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: vpshld (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: vpshlq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: vpshlw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: vpshlb %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: vpshld %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: vpshlq %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: vpshlw %xmm0, (%rdi), %xmm0 # sched: [7:1.00]
-; GENERIC-NEXT: #NO_APP
-; GENERIC-NEXT: retq # sched: [1:1.00]
-;
-; BDVER12-LABEL: test_vpshl:
-; BDVER12: # %bb.0:
-; BDVER12-NEXT: #APP
-; BDVER12-NEXT: vpshlb %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; BDVER12-NEXT: vpshld %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; BDVER12-NEXT: vpshlq %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; BDVER12-NEXT: vpshlw %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
-; BDVER12-NEXT: vpshlb (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; BDVER12-NEXT: vpshld (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; BDVER12-NEXT: vpshlq (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; BDVER12-NEXT: vpshlw (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
-; BDVER12-NEXT: vpshlb %xmm0, (%rdi), %xmm0 # sched: [8:0.50]
-; BDVER12-NEXT: vpshld %xmm0, (%rdi), %xmm0 # sched: [8:0.50]
-; BDVER12-NEXT: vpshlq %xmm0, (%rdi), %xmm0 # sched: [8:0.50]
-; BDVER12-NEXT: vpshlw %xmm0, (%rdi), %xmm0 # sched: [8:0.50]
-; BDVER12-NEXT: #NO_APP
-; BDVER12-NEXT: retq # sched: [5:1.00]
-;
-; BDVER3-LABEL: test_vpshl:
-; BDVER3: # %bb.0:
-; BDVER3-NEXT: #APP
-; BDVER3-NEXT: vpshlb %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: vpshld %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: vpshlq %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: vpshlw %xmm1, %xmm0, %xmm0
-; BDVER3-NEXT: vpshlb (%rdi), %xmm0, %xmm0
-; BDVER3-NEXT: vpshld (%rdi), %xmm0, %xmm0
-; BDVER3-NEXT: vpshlq (%rdi), %xmm0, %xmm0
-; BDVER3-NEXT: vpshlw (%rdi), %xmm0, %xmm0
-; BDVER3-NEXT: vpshlb %xmm0, (%rdi), %xmm0
-; BDVER3-NEXT: vpshld %xmm0, (%rdi), %xmm0
-; BDVER3-NEXT: vpshlq %xmm0, (%rdi), %xmm0
-; BDVER3-NEXT: vpshlw %xmm0, (%rdi), %xmm0
-; BDVER3-NEXT: #NO_APP
-; BDVER3-NEXT: retq
-;
-; BDVER4-LABEL: test_vpshl:
-; BDVER4: # %bb.0:
-; BDVER4-NEXT: #APP
-; BDVER4-NEXT: vpshlb %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: vpshld %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: vpshlq %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: vpshlw %xmm1, %xmm0, %xmm0
-; BDVER4-NEXT: vpshlb (%rdi), %xmm0, %xmm0
-; BDVER4-NEXT: vpshld (%rdi), %xmm0, %xmm0
-; BDVER4-NEXT: vpshlq (%rdi), %xmm0, %xmm0
-; BDVER4-NEXT: vpshlw (%rdi), %xmm0, %xmm0
-; BDVER4-NEXT: vpshlb %xmm0, (%rdi), %xmm0
-; BDVER4-NEXT: vpshld %xmm0, (%rdi), %xmm0
-; BDVER4-NEXT: vpshlq %xmm0, (%rdi), %xmm0
-; BDVER4-NEXT: vpshlw %xmm0, (%rdi), %xmm0
-; BDVER4-NEXT: #NO_APP
-; BDVER4-NEXT: retq
- call void asm sideeffect "vpshlb $1, $0, $0 \0A\09 vpshld $1, $0, $0 \0A\09 vpshlq $1, $0, $0 \0A\09 vpshlw $1, $0, $0 \0A\09 vpshlb $2, $0, $0 \0A\09 vpshld $2, $0, $0 \0A\09 vpshlq $2, $0, $0 \0A\09 vpshlw $2, $0, $0 \0A\09 vpshlb $0, $2, $0 \0A\09 vpshld $0, $2, $0 \0A\09 vpshlq $0, $2, $0 \0A\09 vpshlw $0, $2, $0", "x,x,*m"(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2)
- ret void
-}
diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
index 55903728f9d..d9976cfb155 100644
--- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
+++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
@@ -191,8 +191,7 @@ public:
// Implementation of the llvm::MCStreamer interface. We only care about
// instructions.
void EmitInstruction(const llvm::MCInst &Instruction,
- const llvm::MCSubtargetInfo &STI,
- bool PrintSchedInfo) override {
+ const llvm::MCSubtargetInfo &STI) override {
Result->Instructions.push_back(Instruction);
}
diff --git a/llvm/tools/llvm-mca/CodeRegionGenerator.cpp b/llvm/tools/llvm-mca/CodeRegionGenerator.cpp
index 3e7ea43e5f6..4660af2c40a 100644
--- a/llvm/tools/llvm-mca/CodeRegionGenerator.cpp
+++ b/llvm/tools/llvm-mca/CodeRegionGenerator.cpp
@@ -48,8 +48,7 @@ public:
// We only want to intercept the emission of new instructions.
virtual void EmitInstruction(const MCInst &Inst,
- const MCSubtargetInfo & /* unused */,
- bool /* unused */) override {
+ const MCSubtargetInfo &/* unused */) override {
Regions.addInstruction(Inst);
}
OpenPOWER on IntegriCloud