summaryrefslogtreecommitdiffstats
path: root/llvm/tools/llvm-exegesis/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/tools/llvm-exegesis/lib')
-rw-r--r--llvm/tools/llvm-exegesis/lib/CodeTemplate.cpp50
-rw-r--r--llvm/tools/llvm-exegesis/lib/CodeTemplate.h60
-rw-r--r--llvm/tools/llvm-exegesis/lib/Latency.cpp155
-rw-r--r--llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp16
-rw-r--r--llvm/tools/llvm-exegesis/lib/MCInstrDescView.h8
-rw-r--r--llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp2
-rw-r--r--llvm/tools/llvm-exegesis/lib/SnippetGenerator.h2
-rw-r--r--llvm/tools/llvm-exegesis/lib/Uops.cpp8
8 files changed, 258 insertions, 43 deletions
diff --git a/llvm/tools/llvm-exegesis/lib/CodeTemplate.cpp b/llvm/tools/llvm-exegesis/lib/CodeTemplate.cpp
index 34433daa231..df9d18b94bb 100644
--- a/llvm/tools/llvm-exegesis/lib/CodeTemplate.cpp
+++ b/llvm/tools/llvm-exegesis/lib/CodeTemplate.cpp
@@ -65,4 +65,54 @@ llvm::MCInst InstructionTemplate::build() const {
return Result;
}
+bool isEnumValue(ExecutionMode Execution) {
+ return llvm::isPowerOf2_32(static_cast<uint32_t>(Execution));
+}
+
+llvm::StringRef getName(ExecutionMode Bit) {
+ assert(isEnumValue(Bit) && "Bit must be a power of two");
+ switch (Bit) {
+ case ExecutionMode::UNKNOWN:
+ return "UNKNOWN";
+ case ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS:
+ return "ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS";
+ case ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS:
+ return "ALWAYS_SERIAL_TIED_REGS_ALIAS";
+ case ExecutionMode::SERIAL_VIA_MEMORY_INSTR:
+ return "SERIAL_VIA_MEMORY_INSTR";
+ case ExecutionMode::SERIAL_VIA_EXPLICIT_REGS:
+ return "SERIAL_VIA_EXPLICIT_REGS";
+ case ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR:
+ return "SERIAL_VIA_NON_MEMORY_INSTR";
+ case ExecutionMode::ALWAYS_PARALLEL_MISSING_USE_OR_DEF:
+ return "ALWAYS_PARALLEL_MISSING_USE_OR_DEF";
+ case ExecutionMode::PARALLEL_VIA_EXPLICIT_REGS:
+ return "PARALLEL_VIA_EXPLICIT_REGS";
+ }
+ llvm_unreachable("Missing enum case");
+}
+
+static const ExecutionMode kAllExecutionModeBits[] = {
+ ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS,
+ ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS,
+ ExecutionMode::SERIAL_VIA_MEMORY_INSTR,
+ ExecutionMode::SERIAL_VIA_EXPLICIT_REGS,
+ ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR,
+ ExecutionMode::ALWAYS_PARALLEL_MISSING_USE_OR_DEF,
+ ExecutionMode::PARALLEL_VIA_EXPLICIT_REGS,
+};
+
+llvm::ArrayRef<ExecutionMode> getAllExecutionBits() {
+ return kAllExecutionModeBits;
+}
+
+llvm::SmallVector<ExecutionMode, 4>
+getExecutionModeBits(ExecutionMode Execution) {
+ llvm::SmallVector<ExecutionMode, 4> Result;
+ for (const auto Bit : getAllExecutionBits())
+ if ((Execution & Bit) == Bit)
+ Result.push_back(Bit);
+ return Result;
+}
+
} // namespace exegesis
diff --git a/llvm/tools/llvm-exegesis/lib/CodeTemplate.h b/llvm/tools/llvm-exegesis/lib/CodeTemplate.h
index e5006eb74c9..734992f0afa 100644
--- a/llvm/tools/llvm-exegesis/lib/CodeTemplate.h
+++ b/llvm/tools/llvm-exegesis/lib/CodeTemplate.h
@@ -17,6 +17,7 @@
#define LLVM_TOOLS_LLVM_EXEGESIS_CODETEMPLATE_H
#include "MCInstrDescView.h"
+#include "llvm/ADT/BitmaskEnum.h"
namespace exegesis {
@@ -45,9 +46,65 @@ struct InstructionTemplate {
llvm::SmallVector<llvm::MCOperand, 4> VariableValues;
};
+enum class ExecutionMode : uint8_t {
+ UNKNOWN = 0U,
+ // The instruction is always serial because implicit Use and Def alias.
+ // e.g. AAA (alias via EFLAGS)
+ ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS = 1u << 0,
+
+ // The instruction is always serial because one Def is tied to a Use.
+ // e.g. AND32ri (alias via tied GR32)
+ ALWAYS_SERIAL_TIED_REGS_ALIAS = 1u << 1,
+
+ // The execution can be made serial by inserting a second instruction that
+ // clobbers/reads memory.
+ // e.g. MOV8rm
+ SERIAL_VIA_MEMORY_INSTR = 1u << 2,
+
+ // The execution can be made serial by picking one Def that aliases with one
+ // Use.
+ // e.g. VXORPSrr XMM1, XMM1, XMM2
+ SERIAL_VIA_EXPLICIT_REGS = 1u << 3,
+
+ // The execution can be made serial by inserting a second instruction that
+ // uses one of the Defs and defs one of the Uses.
+ // e.g.
+ // 1st instruction: MMX_PMOVMSKBrr ECX, MM7
+ // 2nd instruction: MMX_MOVD64rr MM7, ECX
+ // or instruction: MMX_MOVD64to64rr MM7, ECX
+ // or instruction: MMX_PINSRWrr MM7, MM7, ECX, 1
+ SERIAL_VIA_NON_MEMORY_INSTR = 1u << 4,
+
+ // The execution is always parallel because the instruction is missing Use or
+ // Def operands.
+ ALWAYS_PARALLEL_MISSING_USE_OR_DEF = 1u << 5,
+
+ // The execution can be made parallel by repeating the same instruction but
+ // making sure that Defs of one instruction do not alias with Uses of the
+ // second one.
+ PARALLEL_VIA_EXPLICIT_REGS = 1u << 6,
+
+ LLVM_MARK_AS_BITMASK_ENUM(/*Largest*/ PARALLEL_VIA_EXPLICIT_REGS)
+};
+
+// Returns whether Execution is one of the values defined in the enum above.
+bool isEnumValue(ExecutionMode Execution);
+
+// Returns a human readable string for the enum.
+llvm::StringRef getName(ExecutionMode Execution);
+
+// Returns a sequence of increasing powers of two corresponding to all the
+// Execution flags.
+llvm::ArrayRef<ExecutionMode> getAllExecutionBits();
+
+// Decomposes Execution into individual set bits.
+llvm::SmallVector<ExecutionMode, 4> getExecutionModeBits(ExecutionMode);
+
+LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
+
// A CodeTemplate is a set of InstructionTemplates that may not be fully
// specified (i.e. some variables are not yet set). This allows the
-// BenchmarkRunner to instantiate it many times with specific values to study
+// SnippetGenerator to instantiate it many times with specific values to study
// their impact on instruction's performance.
struct CodeTemplate {
CodeTemplate() = default;
@@ -57,6 +114,7 @@ struct CodeTemplate {
CodeTemplate(const CodeTemplate &) = delete;
CodeTemplate &operator=(const CodeTemplate &) = delete;
+ ExecutionMode Execution = ExecutionMode::UNKNOWN;
// Some information about how this template has been created.
std::string Info;
// The list of the instructions for this template.
diff --git a/llvm/tools/llvm-exegesis/lib/Latency.cpp b/llvm/tools/llvm-exegesis/lib/Latency.cpp
index 040b42b53e2..7b991a452aa 100644
--- a/llvm/tools/llvm-exegesis/lib/Latency.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Latency.cpp
@@ -20,53 +20,148 @@
namespace exegesis {
-LatencySnippetGenerator::~LatencySnippetGenerator() = default;
+struct ExecutionClass {
+ ExecutionMode Mask;
+ const char *Description;
+} static const kExecutionClasses[] = {
+ {ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS |
+ ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS,
+ "Repeating a single implicitly serial instruction"},
+ {ExecutionMode::SERIAL_VIA_EXPLICIT_REGS,
+ "Repeating a single explicitly serial instruction"},
+ {ExecutionMode::SERIAL_VIA_MEMORY_INSTR |
+ ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR,
+ "Repeating two instructions"},
+};
-llvm::Expected<std::vector<CodeTemplate>>
-generateTwoInstructionPrototypes(const LLVMState &State,
- const Instruction &Instr) {
+static constexpr size_t kMaxAliasingInstructions = 10;
+
+static std::vector<Instruction>
+computeAliasingInstructions(const LLVMState &State, const Instruction &Instr,
+ size_t MaxAliasingInstructions) {
+ // Randomly iterate the set of instructions.
std::vector<unsigned> Opcodes;
Opcodes.resize(State.getInstrInfo().getNumOpcodes());
std::iota(Opcodes.begin(), Opcodes.end(), 0U);
std::shuffle(Opcodes.begin(), Opcodes.end(), randomGenerator());
+
+ std::vector<Instruction> AliasingInstructions;
for (const unsigned OtherOpcode : Opcodes) {
- if (OtherOpcode == Instr.Description->Opcode)
+ if (OtherOpcode == Instr.Description->getOpcode())
continue;
const Instruction OtherInstr(State, OtherOpcode);
if (OtherInstr.hasMemoryOperands())
continue;
- const AliasingConfigurations Forward(Instr, OtherInstr);
- const AliasingConfigurations Back(OtherInstr, Instr);
- if (Forward.empty() || Back.empty())
- continue;
- InstructionTemplate ThisIT(Instr);
- InstructionTemplate OtherIT(OtherInstr);
- if (!Forward.hasImplicitAliasing())
- setRandomAliasing(Forward, ThisIT, OtherIT);
- if (!Back.hasImplicitAliasing())
- setRandomAliasing(Back, OtherIT, ThisIT);
+ if (Instr.hasAliasingRegistersThrough(OtherInstr))
+ AliasingInstructions.push_back(std::move(OtherInstr));
+ if (AliasingInstructions.size() >= MaxAliasingInstructions)
+ break;
+ }
+ return AliasingInstructions;
+}
+
+static ExecutionMode getExecutionModes(const Instruction &Instr) {
+ ExecutionMode EM;
+ if (Instr.hasAliasingImplicitRegisters())
+ EM |= ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS;
+ if (Instr.hasTiedRegisters())
+ EM |= ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS;
+ if (Instr.hasMemoryOperands())
+ EM |= ExecutionMode::SERIAL_VIA_MEMORY_INSTR;
+ else {
+ if (Instr.hasAliasingRegisters())
+ EM |= ExecutionMode::SERIAL_VIA_EXPLICIT_REGS;
+ if (Instr.hasOneUseOrOneDef())
+ EM |= ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR;
+ }
+ return EM;
+}
+
+static void appendCodeTemplates(const LLVMState &State,
+ const Instruction &Instr,
+ ExecutionMode ExecutionModeBit,
+ llvm::StringRef ExecutionClassDescription,
+ std::vector<CodeTemplate> &CodeTemplates) {
+ assert(isEnumValue(ExecutionModeBit) && "Bit must be a power of two");
+ switch (ExecutionModeBit) {
+ case ExecutionMode::ALWAYS_SERIAL_IMPLICIT_REGS_ALIAS:
+ // Nothing to do, the instruction is always serial.
+ LLVM_FALLTHROUGH;
+ case ExecutionMode::ALWAYS_SERIAL_TIED_REGS_ALIAS: {
+ // Picking whatever value for the tied variable will make the instruction
+ // serial.
CodeTemplate CT;
- CT.Info = llvm::formatv("creating cycle through {0}.",
- State.getInstrInfo().getName(OtherOpcode));
- CT.Instructions.push_back(std::move(ThisIT));
- CT.Instructions.push_back(std::move(OtherIT));
- return getSingleton(CT);
+ CT.Execution = ExecutionModeBit;
+ CT.Info = ExecutionClassDescription;
+ CT.Instructions.push_back(Instr);
+ CodeTemplates.push_back(std::move(CT));
+ return;
+ }
+ case ExecutionMode::SERIAL_VIA_MEMORY_INSTR: {
+ // Select back-to-back memory instruction.
+ // TODO: Implement me.
+ return;
+ }
+ case ExecutionMode::SERIAL_VIA_EXPLICIT_REGS: {
+ // Making the execution of this instruction serial by selecting one def
+ // register to alias with one use register.
+ const AliasingConfigurations SelfAliasing(Instr, Instr);
+ assert(!SelfAliasing.empty() && !SelfAliasing.hasImplicitAliasing() &&
+ "Instr must alias itself explicitly");
+ InstructionTemplate IT(Instr);
+ // This is a self aliasing instruction so defs and uses are from the same
+ // instance, hence twice IT in the following call.
+ setRandomAliasing(SelfAliasing, IT, IT);
+ CodeTemplate CT;
+ CT.Execution = ExecutionModeBit;
+ CT.Info = ExecutionClassDescription;
+ CT.Instructions.push_back(std::move(IT));
+ CodeTemplates.push_back(std::move(CT));
+ return;
+ }
+ case ExecutionMode::SERIAL_VIA_NON_MEMORY_INSTR: {
+ // Select back-to-back non-memory instruction.
+ for (const auto OtherInstr :
+ computeAliasingInstructions(State, Instr, kMaxAliasingInstructions)) {
+ const AliasingConfigurations Forward(Instr, OtherInstr);
+ const AliasingConfigurations Back(OtherInstr, Instr);
+ InstructionTemplate ThisIT(Instr);
+ InstructionTemplate OtherIT(OtherInstr);
+ if (!Forward.hasImplicitAliasing())
+ setRandomAliasing(Forward, ThisIT, OtherIT);
+ if (!Back.hasImplicitAliasing())
+ setRandomAliasing(Back, OtherIT, ThisIT);
+ CodeTemplate CT;
+ CT.Execution = ExecutionModeBit;
+ CT.Info = ExecutionClassDescription;
+ CT.Instructions.push_back(std::move(ThisIT));
+ CT.Instructions.push_back(std::move(OtherIT));
+ CodeTemplates.push_back(std::move(CT));
+ }
+ return;
+ }
+ default:
+ llvm_unreachable("Unhandled enum value");
}
- return llvm::make_error<BenchmarkFailure>(
- "Infeasible : Didn't find any scheme to make the instruction serial");
}
+LatencySnippetGenerator::~LatencySnippetGenerator() = default;
+
llvm::Expected<std::vector<CodeTemplate>>
LatencySnippetGenerator::generateCodeTemplates(const Instruction &Instr) const {
- if (Instr.hasMemoryOperands())
+ std::vector<CodeTemplate> Results;
+ const ExecutionMode EM = getExecutionModes(Instr);
+ for (const auto EC : kExecutionClasses) {
+ for (const auto ExecutionModeBit : getExecutionModeBits(EM & EC.Mask))
+ appendCodeTemplates(State, Instr, ExecutionModeBit, EC.Description,
+ Results);
+ if (!Results.empty())
+ break;
+ }
+ if (Results.empty())
return llvm::make_error<BenchmarkFailure>(
- "Infeasible : has memory operands");
- return llvm::handleExpected( //
- generateSelfAliasingCodeTemplates(Instr),
- [this, &Instr]() {
- return generateTwoInstructionPrototypes(State, Instr);
- },
- [](const BenchmarkFailure &) { /*Consume Error*/ });
+ "No strategy found to make the execution serial");
+ return std::move(Results);
}
const char *LatencyBenchmarkRunner::getCounterName() const {
diff --git a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp
index fa9378856f4..59f56520efc 100644
--- a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp
+++ b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.cpp
@@ -27,7 +27,14 @@ unsigned Variable::getPrimaryOperandIndex() const {
return TiedOperands[0];
}
-bool Variable::hasTiedOperands() const { return TiedOperands.size() > 1; }
+bool Variable::hasTiedOperands() const {
+ assert(TiedOperands.size() <= 2 &&
+ "No more than two operands can be tied together");
+ // By definition only Use and Def operands can be tied together.
+ // TiedOperands[0] is the Def operand (LLVM stores defs first).
+ // TiedOperands[1] is the Use operand.
+ return TiedOperands.size() > 1;
+}
unsigned Operand::getIndex() const {
assert(Index >= 0 && "Index must be set");
@@ -197,6 +204,10 @@ bool Instruction::hasAliasingRegisters() const {
return AllDefRegs.anyCommon(AllUseRegs);
}
+bool Instruction::hasOneUseOrOneDef() const {
+ return AllDefRegs.count() || AllUseRegs.count();
+}
+
void Instruction::dump(const llvm::MCRegisterInfo &RegInfo,
llvm::raw_ostream &Stream) const {
Stream << "- " << Name << "\n";
@@ -288,8 +299,7 @@ bool AliasingConfigurations::hasImplicitAliasing() const {
}
AliasingConfigurations::AliasingConfigurations(
- const Instruction &DefInstruction, const Instruction &UseInstruction)
- : DefInstruction(DefInstruction), UseInstruction(UseInstruction) {
+ const Instruction &DefInstruction, const Instruction &UseInstruction) {
if (UseInstruction.AllUseRegs.anyCommon(DefInstruction.AllDefRegs)) {
auto CommonRegisters = UseInstruction.AllUseRegs;
CommonRegisters &= DefInstruction.AllDefRegs;
diff --git a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h
index 6910538a31f..17f3e2b930d 100644
--- a/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h
+++ b/llvm/tools/llvm-exegesis/lib/MCInstrDescView.h
@@ -125,6 +125,11 @@ struct Instruction {
// reads or write the same memory region.
bool hasMemoryOperands() const;
+ // Returns whether this instruction as at least one use or one def.
+ // Repeating this instruction may execute sequentially by adding an
+ // instruction that aliases one of these.
+ bool hasOneUseOrOneDef() const;
+
// Convenient function to help with debugging.
void dump(const llvm::MCRegisterInfo &RegInfo,
llvm::raw_ostream &Stream) const;
@@ -174,10 +179,7 @@ struct AliasingConfigurations {
bool empty() const; // True if no aliasing configuration is found.
bool hasImplicitAliasing() const;
- void setExplicitAliasing() const;
- const Instruction &DefInstruction;
- const Instruction &UseInstruction;
llvm::SmallVector<AliasingRegisterOperands, 32> Configurations;
};
diff --git a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp
index feee61d113c..cdf54a32e4f 100644
--- a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp
+++ b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.cpp
@@ -22,7 +22,7 @@
namespace exegesis {
-std::vector<CodeTemplate> getSingleton(CodeTemplate &CT) {
+std::vector<CodeTemplate> getSingleton(CodeTemplate &&CT) {
std::vector<CodeTemplate> Result;
Result.push_back(std::move(CT));
return Result;
diff --git a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h
index e48cf0cfeb0..4b307fd75ac 100644
--- a/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h
+++ b/llvm/tools/llvm-exegesis/lib/SnippetGenerator.h
@@ -30,7 +30,7 @@
namespace exegesis {
-std::vector<CodeTemplate> getSingleton(CodeTemplate &CT);
+std::vector<CodeTemplate> getSingleton(CodeTemplate &&CT);
// Generates code templates that has a self-dependency.
llvm::Expected<std::vector<CodeTemplate>>
diff --git a/llvm/tools/llvm-exegesis/lib/Uops.cpp b/llvm/tools/llvm-exegesis/lib/Uops.cpp
index a3ada77ef8c..d8065adbdb2 100644
--- a/llvm/tools/llvm-exegesis/lib/Uops.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Uops.cpp
@@ -153,13 +153,13 @@ UopsSnippetGenerator::generateCodeTemplates(const Instruction &Instr) const {
CT.Info = "instruction is parallel, repeating a random one.";
CT.Instructions.push_back(std::move(IT));
instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions);
- return getSingleton(CT);
+ return getSingleton(std::move(CT));
}
if (SelfAliasing.hasImplicitAliasing()) {
CT.Info = "instruction is serial, repeating a random one.";
CT.Instructions.push_back(std::move(IT));
instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions);
- return getSingleton(CT);
+ return getSingleton(std::move(CT));
}
const auto TiedVariables = getVariablesWithTiedOperands(Instr);
if (!TiedVariables.empty()) {
@@ -181,7 +181,7 @@ UopsSnippetGenerator::generateCodeTemplates(const Instruction &Instr) const {
CT.Instructions.push_back(std::move(TmpIT));
}
instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions);
- return getSingleton(CT);
+ return getSingleton(std::move(CT));
}
const auto &ReservedRegisters = State.getRATC().reservedRegisters();
// No tied variables, we pick random values for defs.
@@ -218,7 +218,7 @@ UopsSnippetGenerator::generateCodeTemplates(const Instruction &Instr) const {
"instruction has no tied variables picking Uses different from defs";
CT.Instructions.push_back(std::move(IT));
instantiateMemoryOperands(CT.ScratchSpacePointerInReg, CT.Instructions);
- return getSingleton(CT);
+ return getSingleton(std::move(CT));
}
std::vector<BenchmarkMeasure>
OpenPOWER on IntegriCloud