summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/tools/llvm-exegesis/lib/CodeTemplate.cpp50
-rw-r--r--llvm/tools/llvm-exegesis/lib/CodeTemplate.h60
-rw-r--r--llvm/tools/llvm-exegesis/lib/Latency.cpp155
-rw-r--r--llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp16
-rw-r--r--llvm/tools/llvm-exegesis/lib/MCInstrDescView.h8
-rw-r--r--llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp2
-rw-r--r--llvm/tools/llvm-exegesis/lib/SnippetGenerator.h2
-rw-r--r--llvm/tools/llvm-exegesis/lib/Uops.cpp8
-rw-r--r--llvm/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp237
9 files changed, 426 insertions, 112 deletions
diff --git a/llvm/tools/llvm-exegesis/lib/CodeTemplate.cpp b/llvm/tools/llvm-exegesis/lib/CodeTemplate.cpp
index 34433daa231..df9d18b94bb 100644
--- a/llvm/tools/llvm-exegesis/lib/CodeTemplate.cpp
+++ b/llvm/tools/llvm-exegesis/lib/CodeTemplate.cpp
@@ -65,4 +65,54 @@ llvm::MCInst InstructionTemplate::build() const {
return Result;
}
+bool isEnumValue(ExecutionMode Execution) {
+ return llvm::isPowerOf2_32(static_cast<uint32_t>(Execution));
+}
+
+llvm::StringRef getName(ExecutionMode Bit) {
+ assert(isEnumValue(Bit) && "Bit must be a power of two");
+ switch (Bit) {
+ case ExecutionMode::UNKNOWN:
+ return "UNKNOWN";
+ case ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS:
+ return "ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS";
+ case ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS:
+ return "ALWAYS_SERIAL_TIED_REGS_ALIAS";
+ case ExecutionMode::SERIAL_VIA_MEMORY_INSTR:
+ return "SERIAL_VIA_MEMORY_INSTR";
+ case ExecutionMode::SERIAL_VIA_EXPLICIT_REGS:
+ return "SERIAL_VIA_EXPLICIT_REGS";
+ case ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR:
+ return "SERIAL_VIA_NON_MEMORY_INSTR";
+ case ExecutionMode::ALWAYS_PARALLEL_MISSING_USE_OR_DEF:
+ return "ALWAYS_PARALLEL_MISSING_USE_OR_DEF";
+ case ExecutionMode::PARALLEL_VIA_EXPLICIT_REGS:
+ return "PARALLEL_VIA_EXPLICIT_REGS";
+ }
+ llvm_unreachable("Missing enum case");
+}
+
+static const ExecutionMode kAllExecutionModeBits[] = {
+ ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS,
+ ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS,
+ ExecutionMode::SERIAL_VIA_MEMORY_INSTR,
+ ExecutionMode::SERIAL_VIA_EXPLICIT_REGS,
+ ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR,
+ ExecutionMode::ALWAYS_PARALLEL_MISSING_USE_OR_DEF,
+ ExecutionMode::PARALLEL_VIA_EXPLICIT_REGS,
+};
+
+llvm::ArrayRef<ExecutionMode> getAllExecutionBits() {
+ return kAllExecutionModeBits;
+}
+
+llvm::SmallVector<ExecutionMode, 4>
+getExecutionModeBits(ExecutionMode Execution) {
+ llvm::SmallVector<ExecutionMode, 4> Result;
+ for (const auto Bit : getAllExecutionBits())
+ if ((Execution & Bit) == Bit)
+ Result.push_back(Bit);
+ return Result;
+}
+
} // namespace exegesis
diff --git a/llvm/tools/llvm-exegesis/lib/CodeTemplate.h b/llvm/tools/llvm-exegesis/lib/CodeTemplate.h
index e5006eb74c9..734992f0afa 100644
--- a/llvm/tools/llvm-exegesis/lib/CodeTemplate.h
+++ b/llvm/tools/llvm-exegesis/lib/CodeTemplate.h
@@ -17,6 +17,7 @@
#define LLVM_TOOLS_LLVM_EXEGESIS_CODETEMPLATE_H
#include "MCInstrDescView.h"
+#include "llvm/ADT/BitmaskEnum.h"
namespace exegesis {
@@ -45,9 +46,65 @@ struct InstructionTemplate {
llvm::SmallVector<llvm::MCOperand, 4> VariableValues;
};
+enum class ExecutionMode : uint8_t {
+ UNKNOWN = 0U,
+ // The instruction is always serial because implicit Use and Def alias.
+ // e.g. AAA (alias via EFLAGS)
+ ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS = 1u << 0,
+
+ // The instruction is always serial because one Def is tied to a Use.
+ // e.g. AND32ri (alias via tied GR32)
+ ALWAYS_SERIAL_TIED_REGS_ALIAS = 1u << 1,
+
+ // The execution can be made serial by inserting a second instruction that
+ // clobbers/reads memory.
+ // e.g. MOV8rm
+ SERIAL_VIA_MEMORY_INSTR = 1u << 2,
+
+ // The execution can be made serial by picking one Def that aliases with one
+ // Use.
+ // e.g. VXORPSrr XMM1, XMM1, XMM2
+ SERIAL_VIA_EXPLICIT_REGS = 1u << 3,
+
+ // The execution can be made serial by inserting a second instruction that
+ // uses one of the Defs and defs one of the Uses.
+ // e.g.
+ // 1st instruction: MMX_PMOVMSKBrr ECX, MM7
+ // 2nd instruction: MMX_MOVD64rr MM7, ECX
+ // or instruction: MMX_MOVD64to64rr MM7, ECX
+ // or instruction: MMX_PINSRWrr MM7, MM7, ECX, 1
+ SERIAL_VIA_NON_MEMORY_INSTR = 1u << 4,
+
+ // The execution is always parallel because the instruction is missing Use or
+ // Def operands.
+ ALWAYS_PARALLEL_MISSING_USE_OR_DEF = 1u << 5,
+
+ // The execution can be made parallel by repeating the same instruction but
+ // making sure that Defs of one instruction do not alias with Uses of the
+ // second one.
+ PARALLEL_VIA_EXPLICIT_REGS = 1u << 6,
+
+ LLVM_MARK_AS_BITMASK_ENUM(/*Largest*/ PARALLEL_VIA_EXPLICIT_REGS)
+};
+
+// Returns whether Execution is one of the values defined in the enum above.
+bool isEnumValue(ExecutionMode Execution);
+
+// Returns a human readable string for the enum.
+llvm::StringRef getName(ExecutionMode Execution);
+
+// Returns a sequence of increasing powers of two corresponding to all the
+// Execution flags.
+llvm::ArrayRef<ExecutionMode> getAllExecutionBits();
+
+// Decomposes Execution into individual set bits.
+llvm::SmallVector<ExecutionMode, 4> getExecutionModeBits(ExecutionMode);
+
+LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
+
// A CodeTemplate is a set of InstructionTemplates that may not be fully
// specified (i.e. some variables are not yet set). This allows the
-// BenchmarkRunner to instantiate it many times with specific values to study
+// SnippetGenerator to instantiate it many times with specific values to study
// their impact on instruction's performance.
struct CodeTemplate {
CodeTemplate() = default;
@@ -57,6 +114,7 @@ struct CodeTemplate {
CodeTemplate(const CodeTemplate &) = delete;
CodeTemplate &operator=(const CodeTemplate &) = delete;
+ ExecutionMode Execution = ExecutionMode::UNKNOWN;
// Some information about how this template has been created.
std::string Info;
// The list of the instructions for this template.
diff --git a/llvm/tools/llvm-exegesis/lib/Latency.cpp b/llvm/tools/llvm-exegesis/lib/Latency.cpp
index 040b42b53e2..7b991a452aa 100644
--- a/llvm/tools/llvm-exegesis/lib/Latency.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Latency.cpp
@@ -20,53 +20,148 @@
namespace exegesis {
-LatencySnippetGenerator::~LatencySnippetGenerator() = default;
+struct ExecutionClass {
+ ExecutionMode Mask;
+ const char *Description;
+} static const kExecutionClasses[] = {
+ {ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS |
+ ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS,
+ "Repeating a single implicitly serial instruction"},
+ {ExecutionMode::SERIAL_VIA_EXPLICIT_REGS,
+ "Repeating a single explicitly serial instruction"},
+ {ExecutionMode::SERIAL_VIA_MEMORY_INSTR |
+ ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR,
+ "Repeating two instructions"},
+};
-llvm::Expected<std::vector<CodeTemplate>>
-generateTwoInstructionPrototypes(const LLVMState &State,
- const Instruction &Instr) {
+static constexpr size_t kMaxAliasingInstructions = 10;
+
+static std::vector<Instruction>
+computeAliasingInstructions(const LLVMState &State, const Instruction &Instr,
+ size_t MaxAliasingInstructions) {
+ // Randomly iterate the set of instructions.
std::vector<unsigned> Opcodes;
Opcodes.resize(State.getInstrInfo().getNumOpcodes());
std::iota(Opcodes.begin(), Opcodes.end(), 0U);
std::shuffle(Opcodes.begin(), Opcodes.end(), randomGenerator());
+
+ std::vector<Instruction> AliasingInstructions;
for (const unsigned OtherOpcode : Opcodes) {
- if (OtherOpcode == Instr.Description->Opcode)
+ if (OtherOpcode == Instr.Description->getOpcode())
continue;
const Instruction OtherInstr(State, OtherOpcode);
if (OtherInstr.hasMemoryOperands())
continue;
- const AliasingConfigurations Forward(Instr, OtherInstr);
- const AliasingConfigurations Back(OtherInstr, Instr);
- if (Forward.empty() || Back.empty())
- continue;
- InstructionTemplate ThisIT(Instr);
- InstructionTemplate OtherIT(OtherInstr);
- if (!Forward.hasImplicitAliasing())
- setRandomAliasing(Forward, ThisIT, OtherIT);
- if (!Back.hasImplicitAliasing())
- setRandomAliasing(Back, OtherIT, ThisIT);
+ if (Instr.hasAliasingRegistersThrough(OtherInstr))
+ AliasingInstructions.push_back(std::move(OtherInstr));
+ if (AliasingInstructions.size() >= MaxAliasingInstructions)
+ break;
+ }
+ return AliasingInstructions;
+}
+
+static ExecutionMode getExecutionModes(const Instruction &Instr) {
+ ExecutionMode EM;
+ if (Instr.hasAliasingImplicitRegisters())
+ EM |= ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS;
+ if (Instr.hasTiedRegisters())
+ EM |= ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS;
+ if (Instr.hasMemoryOperands())
+ EM |= ExecutionMode::SERIAL_VIA_MEMORY_INSTR;
+ else {
+ if (Instr.hasAliasingRegisters())
+ EM |= ExecutionMode::SERIAL_VIA_EXPLICIT_REGS;
+ if (Instr.hasOneUseOrOneDef())
+ EM |= ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR;
+ }
+ return EM;
+}
+
+static void appendCodeTemplates(const LLVMState &State,
+ const Instruction &Instr,
+ ExecutionMode ExecutionModeBit,
+ llvm::StringRef ExecutionClassDescription,
+ std::vector<CodeTemplate> &CodeTemplates) {
+ assert(isEnumValue(ExecutionModeBit) && "Bit must be a power of two");
+ switch (ExecutionModeBit) {
+ case ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS:
+ // Nothing to do, the instruction is always serial.
+ LLVM_FALLTHROUGH;
+ case ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS: {
+ // Picking whatever value for the tied variable will make the instruction
+ // serial.
CodeTemplate CT;
- CT.Info = llvm::formatv("creating cycle through {0}.",
- State.getInstrInfo().getName(OtherOpcode));
- CT.Instructions.push_back(std::move(ThisIT));
- CT.Instructions.push_back(std::move(OtherIT));
- return getSingleton(CT);
+ CT.Execution = ExecutionModeBit;
+ CT.Info = ExecutionClassDescription;
+ CT.Instructions.push_back(Instr);
+ CodeTemplates.push_back(std::move(CT));
+ return;
+ }
+ case ExecutionMode::SERIAL_VIA_MEMORY_INSTR: {
+ // Select back-to-back memory instruction.
+ // TODO: Implement me.
+ return;
+ }
+ case ExecutionMode::SERIAL_VIA_EXPLICIT_REGS: {
+ // Making the execution of this instruction serial by selecting one def
+ // register to alias with one use register.
+ const AliasingConfigurations SelfAliasing(Instr, Instr);
+ assert(!SelfAliasing.empty() && !SelfAliasing.hasImplicitAliasing() &&
+ "Instr must alias itself explicitly");
+ InstructionTemplate IT(Instr);
+ // This is a self aliasing instruction so defs and uses are from the same
+ // instance, hence twice IT in the following call.
+ setRandomAliasing(SelfAliasing, IT, IT);
+ CodeTemplate CT;
+ CT.Execution = ExecutionModeBit;
+ CT.Info = ExecutionClassDescription;
+ CT.Instructions.push_back(std::move(IT));
+ CodeTemplates.push_back(std::move(CT));
+ return;
+ }
+ case ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR: {
+ // Select back-to-back non-memory instruction.
+ for (const auto OtherInstr :
+ computeAliasingInstructions(State, Instr, kMaxAliasingInstructions)) {
+ const AliasingConfigurations Forward(Instr, OtherInstr);
+ const AliasingConfigurations Back(OtherInstr, Instr);
+ InstructionTemplate ThisIT(Instr);
+ InstructionTemplate OtherIT(OtherInstr);
+ if (!Forward.hasImplicitAliasing())
+ setRandomAliasing(Forward, ThisIT, OtherIT);
+ if (!Back.hasImplicitAliasing())
+ setRandomAliasing(Back, OtherIT, ThisIT);
+ CodeTemplate CT;
+ CT.Execution = ExecutionModeBit;
+ CT.Info = ExecutionClassDescription;
+ CT.Instructions.push_back(std::move(ThisIT));
+ CT.Instructions.push_back(std::move(OtherIT));
+ CodeTemplates.push_back(std::move(CT));
+ }
+ return;
+ }
+ default:
+ llvm_unreachable("Unhandled enum value");
}
- return llvm::make_error<BenchmarkFailure>(
- "Infeasible : Didn't find any scheme to make the instruction serial");
}
+LatencySnippetGenerator::~LatencySnippetGenerator() = default;
+
llvm::Expected<std::vector<CodeTemplate>>
LatencySnippetGenerator::generateCodeTemplates(const Instruction &Instr) const {
- if (Instr.hasMemoryOperands())
+ std::vector<CodeTemplate> Results;
+ const ExecutionMode EM = getExecutionModes(Instr);
+ for (const auto EC : kExecutionClasses) {
+ for (const auto ExecutionModeBit : getExecutionModeBits(EM & EC.Mask))
+ appendCodeTemplates(State, Instr, ExecutionModeBit, EC.Description,
+ Results);
+ if (!Results.empty())
+ break;
+ }
+ if (Results.empty())
return llvm::make_error<BenchmarkFailure>(
- "Infeasible : has memory operands");
- return llvm::handleExpected( //
- generateSelfAliasingCodeTemplates(Instr),
- [this, &Instr]() {
- return generateTwoInstructionPrototypes(State, Instr);
- },
- [](const BenchmarkFailure &) { /*Consume Error*/ });
+ "No strategy found to make the execution serial");
+ return std::move(Results);
}
const char *LatencyBenchmarkRunner::getCounterName() const {
diff --git a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp
index fa9378856f4..59f56520efc 100644
--- a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp
+++ b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp
@@ -27,7 +27,14 @@ unsigned Variable::getPrimaryOperandIndex() const {
return TiedOperands[0];
}
-bool Variable::hasTiedOperands() const { return TiedOperands.size() > 1; }
+bool Variable::hasTiedOperands() const {
+ assert(TiedOperands.size() <= 2 &&
+ "No more than two operands can be tied together");
+ // By definition only Use and Def operands can be tied together.
+ // TiedOperands[0] is the Def operand (LLVM stores defs first).
+ // TiedOperands[1] is the Use operand.
+ return TiedOperands.size() > 1;
+}
unsigned Operand::getIndex() const {
assert(Index >= 0 && "Index must be set");
@@ -197,6 +204,10 @@ bool Instruction::hasAliasingRegisters() const {
return AllDefRegs.anyCommon(AllUseRegs);
}
+bool Instruction::hasOneUseOrOneDef() const {
+ return AllDefRegs.count() || AllUseRegs.count();
+}
+
void Instruction::dump(const llvm::MCRegisterInfo &RegInfo,
llvm::raw_ostream &Stream) const {
Stream << "- " << Name << "\n";
@@ -288,8 +299,7 @@ bool AliasingConfigurations::hasImplicitAliasing() const {
}
AliasingConfigurations::AliasingConfigurations(
- const Instruction &DefInstruction, const Instruction &UseInstruction)
- : DefInstruction(DefInstruction), UseInstruction(UseInstruction) {
+ const Instruction &DefInstruction, const Instruction &UseInstruction) {
if (UseInstruction.AllUseRegs.anyCommon(DefInstruction.AllDefRegs)) {
auto CommonRegisters = UseInstruction.AllUseRegs;
CommonRegisters &= DefInstruction.AllDefRegs;
diff --git a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h
index 6910538a31f..17f3e2b930d 100644
--- a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h
+++ b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h
@@ -125,6 +125,11 @@ struct Instruction {
// reads or write the same memory region.
bool hasMemoryOperands() const;
+ // Returns whether this instruction as at least one use or one def.
+ // Repeating this instruction may execute sequentially by adding an
+ // instruction that aliases one of these.
+ bool hasOneUseOrOneDef() const;
+
// Convenient function to help with debugging.
void dump(const llvm::MCRegisterInfo &RegInfo,
llvm::raw_ostream &Stream) const;
@@ -174,10 +179,7 @@ struct AliasingConfigurations {
bool empty() const; // True if no aliasing configuration is found.
bool hasImplicitAliasing() const;
- void setExplicitAliasing() const;
- const Instruction &DefInstruction;
- const Instruction &UseInstruction;
llvm::SmallVector<AliasingRegisterOperands, 32> Configurations;
};
diff --git a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp
index feee61d113c..cdf54a32e4f 100644
--- a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp
+++ b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp
@@ -22,7 +22,7 @@
namespace exegesis {
-std::vector<CodeTemplate> getSingleton(CodeTemplate &CT) {
+std::vector<CodeTemplate> getSingleton(CodeTemplate &&CT) {
std::vector<CodeTemplate> Result;
Result.push_back(std::move(CT));
return Result;
diff --git a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h
index e48cf0cfeb0..4b307fd75ac 100644
--- a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h
+++ b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h
@@ -30,7 +30,7 @@
namespace exegesis {
-std::vector<CodeTemplate> getSingleton(CodeTemplate &CT);
+std::vector<CodeTemplate> getSingleton(CodeTemplate &&CT);
// Generates code templates that has a self-dependency.
llvm::Expected<std::vector<CodeTemplate>>
diff --git a/llvm/tools/llvm-exegesis/lib/Uops.cpp b/llvm/tools/llvm-exegesis/lib/Uops.cpp
index a3ada77ef8c..d8065adbdb2 100644
--- a/llvm/tools/llvm-exegesis/lib/Uops.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Uops.cpp
@@ -153,13 +153,13 @@ UopsSnippetGenerator::generateCodeTemplates(const Instruction &Instr) const {
CT.Info = "instruction is parallel, repeating a random one.";
CT.Instructions.push_back(std::move(IT));
instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions);
- return getSingleton(CT);
+ return getSingleton(std::move(CT));
}
if (SelfAliasing.hasImplicitAliasing()) {
CT.Info = "instruction is serial, repeating a random one.";
CT.Instructions.push_back(std::move(IT));
instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions);
- return getSingleton(CT);
+ return getSingleton(std::move(CT));
}
const auto TiedVariables = getVariablesWithTiedOperands(Instr);
if (!TiedVariables.empty()) {
@@ -181,7 +181,7 @@ UopsSnippetGenerator::generateCodeTemplates(const Instruction &Instr) const {
CT.Instructions.push_back(std::move(TmpIT));
}
instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions);
- return getSingleton(CT);
+ return getSingleton(std::move(CT));
}
const auto &ReservedRegisters = State.getRATC().reservedRegisters();
// No tied variables, we pick random values for defs.
@@ -218,7 +218,7 @@ UopsSnippetGenerator::generateCodeTemplates(const Instruction &Instr) const {
"instruction has no tied variables picking Uses different from defs";
CT.Instructions.push_back(std::move(IT));
instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions);
- return getSingleton(CT);
+ return getSingleton(std::move(CT));
}
std::vector<BenchmarkMeasure>
diff --git a/llvm/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp b/llvm/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp
index 6cc24a02cfc..4b3fa5455a3 100644
--- a/llvm/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp
+++ b/llvm/unittests/tools/llvm-exegesis/X86/SnippetGeneratorTest.cpp
@@ -25,6 +25,7 @@ namespace {
using testing::AnyOf;
using testing::ElementsAre;
+using testing::Gt;
using testing::HasSubstr;
using testing::Not;
using testing::SizeIs;
@@ -57,14 +58,12 @@ class SnippetGeneratorTest : public X86SnippetGeneratorTest {
protected:
SnippetGeneratorTest() : Generator(State) {}
- CodeTemplate checkAndGetCodeTemplate(unsigned Opcode) {
+ std::vector<CodeTemplate> checkAndGetCodeTemplates(unsigned Opcode) {
randomGenerator().seed(0); // Initialize seed.
const Instruction Instr(State, Opcode);
auto CodeTemplateOrError = Generator.generateCodeTemplates(Instr);
EXPECT_FALSE(CodeTemplateOrError.takeError()); // Valid configuration.
- auto &CodeTemplate = CodeTemplateOrError.get();
- EXPECT_EQ(CodeTemplate.size(), 1U);
- return std::move(CodeTemplate.front());
+ return std::move(CodeTemplateOrError.get());
}
SnippetGeneratorT Generator;
@@ -75,21 +74,25 @@ using LatencySnippetGeneratorTest =
using UopsSnippetGeneratorTest = SnippetGeneratorTest<UopsSnippetGenerator>;
-TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependency) {
- // ADC16i16 self alias because of implicit use and def.
-
- // explicit use 0 : imm
- // implicit def : AX
- // implicit def : EFLAGS
- // implicit use : AX
- // implicit use : EFLAGS
+TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughImplicitReg) {
+ // - ADC16i16
+ // - Op0 Explicit Use Immediate
+ // - Op1 Implicit Def Reg(AX)
+ // - Op2 Implicit Def Reg(EFLAGS)
+ // - Op3 Implicit Use Reg(AX)
+ // - Op4 Implicit Use Reg(EFLAGS)
+ // - Var0 [Op0]
+ // - hasAliasingImplicitRegisters (execution is always serial)
+ // - hasAliasingRegisters
const unsigned Opcode = llvm::X86::ADC16i16;
EXPECT_THAT(MCInstrInfo.get(Opcode).getImplicitDefs()[0], llvm::X86::AX);
EXPECT_THAT(MCInstrInfo.get(Opcode).getImplicitDefs()[1], llvm::X86::EFLAGS);
EXPECT_THAT(MCInstrInfo.get(Opcode).getImplicitUses()[0], llvm::X86::AX);
EXPECT_THAT(MCInstrInfo.get(Opcode).getImplicitUses()[1], llvm::X86::EFLAGS);
- const CodeTemplate CT = checkAndGetCodeTemplate(Opcode);
- EXPECT_THAT(CT.Info, HasSubstr("implicit"));
+ const auto CodeTemplates = checkAndGetCodeTemplates(Opcode);
+ ASSERT_THAT(CodeTemplates, SizeIs(1));
+ const auto &CT = CodeTemplates[0];
+ EXPECT_THAT(CT.Execution, ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS);
ASSERT_THAT(CT.Instructions, SizeIs(1));
const InstructionTemplate &IT = CT.Instructions[0];
EXPECT_THAT(IT.getOpcode(), Opcode);
@@ -97,63 +100,105 @@ TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependency) {
EXPECT_THAT(IT.VariableValues[0], IsInvalid()) << "Immediate is not set";
}
-TEST_F(LatencySnippetGeneratorTest, ExplicitSelfDependency) {
- // ADD16ri self alias because Op0 and Op1 are tied together.
-
- // explicit def 0 : reg RegClass=GR16
- // explicit use 1 : reg RegClass=GR16 | TIED_TO:0
- // explicit use 2 : imm
- // implicit def : EFLAGS
+TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughTiedRegs) {
+ // - ADD16ri
+ // - Op0 Explicit Def RegClass(GR16)
+ // - Op1 Explicit Use RegClass(GR16) TiedToOp0
+ // - Op2 Explicit Use Immediate
+ // - Op3 Implicit Def Reg(EFLAGS)
+ // - Var0 [Op0,Op1]
+ // - Var1 [Op2]
+ // - hasTiedRegisters (execution is always serial)
+ // - hasAliasingRegisters
const unsigned Opcode = llvm::X86::ADD16ri;
EXPECT_THAT(MCInstrInfo.get(Opcode).getImplicitDefs()[0], llvm::X86::EFLAGS);
- const CodeTemplate CT = checkAndGetCodeTemplate(Opcode);
- EXPECT_THAT(CT.Info, HasSubstr("explicit"));
+ const auto CodeTemplates = checkAndGetCodeTemplates(Opcode);
+ ASSERT_THAT(CodeTemplates, SizeIs(1));
+ const auto &CT = CodeTemplates[0];
+ EXPECT_THAT(CT.Execution, ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS);
ASSERT_THAT(CT.Instructions, SizeIs(1));
const InstructionTemplate &IT = CT.Instructions[0];
EXPECT_THAT(IT.getOpcode(), Opcode);
ASSERT_THAT(IT.VariableValues, SizeIs(2));
- EXPECT_THAT(IT.VariableValues[0], IsReg()) << "Operand 0 and 1";
+ EXPECT_THAT(IT.VariableValues[0], IsInvalid()) << "Operand 1 is not set";
EXPECT_THAT(IT.VariableValues[1], IsInvalid()) << "Operand 2 is not set";
}
-TEST_F(LatencySnippetGeneratorTest, DependencyThroughOtherOpcode) {
- // CMP64rr
- // explicit use 0 : reg RegClass=GR64
- // explicit use 1 : reg RegClass=GR64
- // implicit def : EFLAGS
-
- const unsigned Opcode = llvm::X86::CMP64rr;
- const CodeTemplate CT = checkAndGetCodeTemplate(Opcode);
- EXPECT_THAT(CT.Info, HasSubstr("cycle through"));
- ASSERT_THAT(CT.Instructions, SizeIs(2));
+TEST_F(LatencySnippetGeneratorTest, ImplicitSelfDependencyThroughExplicitRegs) {
+ // - VXORPSrr
+ // - Op0 Explicit Def RegClass(VR128)
+ // - Op1 Explicit Use RegClass(VR128)
+ // - Op2 Explicit Use RegClass(VR128)
+ // - Var0 [Op0]
+ // - Var1 [Op1]
+ // - Var2 [Op2]
+ // - hasAliasingRegisters
+ const unsigned Opcode = llvm::X86::VXORPSrr;
+ const auto CodeTemplates = checkAndGetCodeTemplates(Opcode);
+ ASSERT_THAT(CodeTemplates, SizeIs(1));
+ const auto &CT = CodeTemplates[0];
+ EXPECT_THAT(CT.Execution, ExecutionMode::SERIAL_VIA_EXPLICIT_REGS);
+ ASSERT_THAT(CT.Instructions, SizeIs(1));
const InstructionTemplate &IT = CT.Instructions[0];
EXPECT_THAT(IT.getOpcode(), Opcode);
- ASSERT_THAT(IT.VariableValues, SizeIs(2));
- EXPECT_THAT(IT.VariableValues, AnyOf(ElementsAre(IsReg(), IsInvalid()),
- ElementsAre(IsInvalid(), IsReg())));
- EXPECT_THAT(CT.Instructions[1].getOpcode(), Not(Opcode));
- // TODO: check that the two instructions alias each other.
+ ASSERT_THAT(IT.VariableValues, SizeIs(3));
+ EXPECT_THAT(IT.VariableValues,
+ AnyOf(ElementsAre(IsReg(), IsInvalid(), IsReg()),
+ ElementsAre(IsReg(), IsReg(), IsInvalid())))
+ << "Op0 is either set to Op1 or to Op2";
+}
+
+TEST_F(LatencySnippetGeneratorTest, DependencyThroughOtherOpcode) {
+ // - CMP64rr
+ // - Op0 Explicit Use RegClass(GR64)
+ // - Op1 Explicit Use RegClass(GR64)
+ // - Op2 Implicit Def Reg(EFLAGS)
+ // - Var0 [Op0]
+ // - Var1 [Op1]
+ const unsigned Opcode = llvm::X86::CMP64rr;
+ const auto CodeTemplates = checkAndGetCodeTemplates(Opcode);
+ ASSERT_THAT(CodeTemplates, SizeIs(Gt(1U))) << "Many templates are available";
+ for (const auto &CT : CodeTemplates) {
+ EXPECT_THAT(CT.Execution, ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR);
+ ASSERT_THAT(CT.Instructions, SizeIs(2));
+ const InstructionTemplate &IT = CT.Instructions[0];
+ EXPECT_THAT(IT.getOpcode(), Opcode);
+ ASSERT_THAT(IT.VariableValues, SizeIs(2));
+ EXPECT_THAT(IT.VariableValues, AnyOf(ElementsAre(IsReg(), IsInvalid()),
+ ElementsAre(IsInvalid(), IsReg())));
+ EXPECT_THAT(CT.Instructions[1].getOpcode(), Not(Opcode));
+ // TODO: check that the two instructions alias each other.
+ }
}
TEST_F(LatencySnippetGeneratorTest, LAHF) {
+ // - LAHF
+ // - Op0 Implicit Def Reg(AH)
+ // - Op1 Implicit Use Reg(EFLAGS)
const unsigned Opcode = llvm::X86::LAHF;
- const CodeTemplate CT = checkAndGetCodeTemplate(Opcode);
- EXPECT_THAT(CT.Info, HasSubstr("cycle through"));
- ASSERT_THAT(CT.Instructions, SizeIs(2));
- const InstructionTemplate &IT = CT.Instructions[0];
- EXPECT_THAT(IT.getOpcode(), Opcode);
- ASSERT_THAT(IT.VariableValues, SizeIs(0));
+ const auto CodeTemplates = checkAndGetCodeTemplates(Opcode);
+ ASSERT_THAT(CodeTemplates, SizeIs(Gt(1U))) << "Many templates are available";
+ for (const auto &CT : CodeTemplates) {
+ EXPECT_THAT(CT.Execution, ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR);
+ ASSERT_THAT(CT.Instructions, SizeIs(2));
+ const InstructionTemplate &IT = CT.Instructions[0];
+ EXPECT_THAT(IT.getOpcode(), Opcode);
+ ASSERT_THAT(IT.VariableValues, SizeIs(0));
+ }
}
TEST_F(UopsSnippetGeneratorTest, ParallelInstruction) {
- // BNDCL32rr is parallel no matter what.
-
- // explicit use 0 : reg RegClass=BNDR
- // explicit use 1 : reg RegClass=GR32
-
+ // - BNDCL32rr
+ // - Op0 Explicit Use RegClass(BNDR)
+ // - Op1 Explicit Use RegClass(GR32)
+ // - Var0 [Op0]
+ // - Var1 [Op1]
const unsigned Opcode = llvm::X86::BNDCL32rr;
- const CodeTemplate CT = checkAndGetCodeTemplate(Opcode);
+ const auto CodeTemplates = checkAndGetCodeTemplates(Opcode);
+ ASSERT_THAT(CodeTemplates, SizeIs(1));
+ const auto &CT = CodeTemplates[0];
EXPECT_THAT(CT.Info, HasSubstr("parallel"));
+ EXPECT_THAT(CT.Execution, ExecutionMode::UNKNOWN);
ASSERT_THAT(CT.Instructions, SizeIs(1));
const InstructionTemplate &IT = CT.Instructions[0];
EXPECT_THAT(IT.getOpcode(), Opcode);
@@ -163,14 +208,18 @@ TEST_F(UopsSnippetGeneratorTest, ParallelInstruction) {
}
TEST_F(UopsSnippetGeneratorTest, SerialInstruction) {
- // CDQ is serial no matter what.
-
- // implicit def : EAX
- // implicit def : EDX
- // implicit use : EAX
+ // - CDQ
+ // - Op0 Implicit Def Reg(EAX)
+ // - Op1 Implicit Def Reg(EDX)
+ // - Op2 Implicit Use Reg(EAX)
+ // - hasAliasingImplicitRegisters (execution is always serial)
+ // - hasAliasingRegisters
const unsigned Opcode = llvm::X86::CDQ;
- const CodeTemplate CT = checkAndGetCodeTemplate(Opcode);
+ const auto CodeTemplates = checkAndGetCodeTemplates(Opcode);
+ ASSERT_THAT(CodeTemplates, SizeIs(1));
+ const auto &CT = CodeTemplates[0];
EXPECT_THAT(CT.Info, HasSubstr("serial"));
+ EXPECT_THAT(CT.Execution, ExecutionMode::UNKNOWN);
ASSERT_THAT(CT.Instructions, SizeIs(1));
const InstructionTemplate &IT = CT.Instructions[0];
EXPECT_THAT(IT.getOpcode(), Opcode);
@@ -181,13 +230,21 @@ TEST_F(UopsSnippetGeneratorTest, StaticRenaming) {
// CMOVA32rr has tied variables, we enumerate the possible values to execute
// as many in parallel as possible.
- // explicit def 0 : reg RegClass=GR32
- // explicit use 1 : reg RegClass=GR32 | TIED_TO:0
- // explicit use 2 : reg RegClass=GR32
- // implicit use : EFLAGS
+ // - CMOVA32rr
+ // - Op0 Explicit Def RegClass(GR32)
+ // - Op1 Explicit Use RegClass(GR32) TiedToOp0
+ // - Op2 Explicit Use RegClass(GR32)
+ // - Op3 Implicit Use Reg(EFLAGS)
+ // - Var0 [Op0,Op1]
+ // - Var1 [Op2]
+ // - hasTiedRegisters (execution is always serial)
+ // - hasAliasingRegisters
const unsigned Opcode = llvm::X86::CMOVA32rr;
- const CodeTemplate CT = checkAndGetCodeTemplate(Opcode);
+ const auto CodeTemplates = checkAndGetCodeTemplates(Opcode);
+ ASSERT_THAT(CodeTemplates, SizeIs(1));
+ const auto &CT = CodeTemplates[0];
EXPECT_THAT(CT.Info, HasSubstr("static renaming"));
+ EXPECT_THAT(CT.Execution, ExecutionMode::UNKNOWN);
constexpr const unsigned kInstructionCount = 15;
ASSERT_THAT(CT.Instructions, SizeIs(kInstructionCount));
std::unordered_set<unsigned> AllDefRegisters;
@@ -203,14 +260,23 @@ TEST_F(UopsSnippetGeneratorTest, NoTiedVariables) {
// CMOV_GR32 has no tied variables, we make sure def and use are different
// from each other.
- // explicit def 0 : reg RegClass=GR32
- // explicit use 1 : reg RegClass=GR32
- // explicit use 2 : reg RegClass=GR32
- // explicit use 3 : imm
- // implicit use : EFLAGS
+ // - CMOV_GR32
+ // - Op0 Explicit Def RegClass(GR32)
+ // - Op1 Explicit Use RegClass(GR32)
+ // - Op2 Explicit Use RegClass(GR32)
+ // - Op3 Explicit Use Immediate
+ // - Op4 Implicit Use Reg(EFLAGS)
+ // - Var0 [Op0]
+ // - Var1 [Op1]
+ // - Var2 [Op2]
+ // - Var3 [Op3]
+ // - hasAliasingRegisters
const unsigned Opcode = llvm::X86::CMOV_GR32;
- const CodeTemplate CT = checkAndGetCodeTemplate(Opcode);
+ const auto CodeTemplates = checkAndGetCodeTemplates(Opcode);
+ ASSERT_THAT(CodeTemplates, SizeIs(1));
+ const auto &CT = CodeTemplates[0];
EXPECT_THAT(CT.Info, HasSubstr("no tied variables"));
+ EXPECT_THAT(CT.Execution, ExecutionMode::UNKNOWN);
ASSERT_THAT(CT.Instructions, SizeIs(1));
const InstructionTemplate &IT = CT.Instructions[0];
EXPECT_THAT(IT.getOpcode(), Opcode);
@@ -224,9 +290,27 @@ TEST_F(UopsSnippetGeneratorTest, NoTiedVariables) {
TEST_F(UopsSnippetGeneratorTest, MemoryUse) {
// Mov32rm reads from memory.
+ // - MOV32rm
+ // - Op0 Explicit Def RegClass(GR32)
+ // - Op1 Explicit Use Memory RegClass(GR8)
+ // - Op2 Explicit Use Memory
+ // - Op3 Explicit Use Memory RegClass(GRH8)
+ // - Op4 Explicit Use Memory
+ // - Op5 Explicit Use Memory RegClass(SEGMENT_REG)
+ // - Var0 [Op0]
+ // - Var1 [Op1]
+ // - Var2 [Op2]
+ // - Var3 [Op3]
+ // - Var4 [Op4]
+ // - Var5 [Op5]
+ // - hasMemoryOperands
+ // - hasAliasingRegisters
const unsigned Opcode = llvm::X86::MOV32rm;
- const CodeTemplate CT = checkAndGetCodeTemplate(Opcode);
+ const auto CodeTemplates = checkAndGetCodeTemplates(Opcode);
+ ASSERT_THAT(CodeTemplates, SizeIs(1));
+ const auto &CT = CodeTemplates[0];
EXPECT_THAT(CT.Info, HasSubstr("no tied variables"));
+ EXPECT_THAT(CT.Execution, ExecutionMode::UNKNOWN);
ASSERT_THAT(CT.Instructions,
SizeIs(UopsSnippetGenerator::kMinNumDifferentAddresses));
const InstructionTemplate &IT = CT.Instructions[0];
@@ -240,6 +324,21 @@ TEST_F(UopsSnippetGeneratorTest, MemoryUse) {
TEST_F(UopsSnippetGeneratorTest, MemoryUse_Movsb) {
// MOVSB writes to scratch memory register.
+ // - MOVSB
+ // - Op0 Explicit Use Memory RegClass(GR8)
+ // - Op1 Explicit Use Memory RegClass(GR8)
+ // - Op2 Explicit Use Memory RegClass(SEGMENT_REG)
+ // - Op3 Implicit Def Reg(EDI)
+ // - Op4 Implicit Def Reg(ESI)
+ // - Op5 Implicit Use Reg(EDI)
+ // - Op6 Implicit Use Reg(ESI)
+ // - Op7 Implicit Use Reg(DF)
+ // - Var0 [Op0]
+ // - Var1 [Op1]
+ // - Var2 [Op2]
+ // - hasMemoryOperands
+ // - hasAliasingImplicitRegisters (execution is always serial)
+ // - hasAliasingRegisters
const unsigned Opcode = llvm::X86::MOVSB;
const Instruction Instr(State, Opcode);
auto Error = Generator.generateCodeTemplates(Instr).takeError();
OpenPOWER on IntegriCloud