//===-- Target.cpp ----------------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #include "../Target.h" #include "../Latency.h" #include "../Uops.h" #include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86MCTargetDesc.h" #include "X86.h" #include "X86RegisterInfo.h" #include "X86Subtarget.h" #include "llvm/MC/MCInstBuilder.h" namespace exegesis { namespace { // Common code for X86 Uops and Latency runners. template class X86BenchmarkRunner : public Impl { using Impl::Impl; llvm::Expected generatePrototype(unsigned Opcode) const override { // Test whether we can generate a snippet for this instruction. const auto &InstrInfo = this->State.getInstrInfo(); const auto OpcodeName = InstrInfo.getName(Opcode); if (OpcodeName.startswith("POPF") || OpcodeName.startswith("PUSHF") || OpcodeName.startswith("ADJCALLSTACK")) { return llvm::make_error( "Unsupported opcode: Push/Pop/AdjCallStack"); } // Handle X87. const auto &InstrDesc = InstrInfo.get(Opcode); const unsigned FPInstClass = InstrDesc.TSFlags & llvm::X86II::FPTypeMask; const Instruction Instr(InstrDesc, this->RATC); switch (FPInstClass) { case llvm::X86II::NotFP: break; case llvm::X86II::ZeroArgFP: return llvm::make_error("Unsupported x87 ZeroArgFP"); case llvm::X86II::OneArgFP: return llvm::make_error("Unsupported x87 OneArgFP"); case llvm::X86II::OneArgFPRW: case llvm::X86II::TwoArgFP: { // These are instructions like // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW) // - `ST(0) = ST(0) + ST(i)` (TwoArgFP) // They are intrinsically serial and do not modify the state of the stack. // We generate the same code for latency and uops. return this->generateSelfAliasingPrototype(Instr); } case llvm::X86II::CompareFP: return Impl::handleCompareFP(Instr); case llvm::X86II::CondMovFP: return Impl::handleCondMovFP(Instr); case llvm::X86II::SpecialFP: return llvm::make_error("Unsupported x87 SpecialFP"); default: llvm_unreachable("Unknown FP Type!"); } // Fallback to generic implementation. return Impl::Base::generatePrototype(Opcode); } }; class X86LatencyImpl : public LatencyBenchmarkRunner { protected: using Base = LatencyBenchmarkRunner; using Base::Base; llvm::Expected handleCompareFP(const Instruction &Instr) const { return llvm::make_error("Unsupported x87 CompareFP"); } llvm::Expected handleCondMovFP(const Instruction &Instr) const { return llvm::make_error("Unsupported x87 CondMovFP"); } }; class X86UopsImpl : public UopsBenchmarkRunner { protected: using Base = UopsBenchmarkRunner; using Base::Base; // We can compute uops for any FP instruction that does not grow or shrink the // stack (either do not touch the stack or push as much as they pop). llvm::Expected handleCompareFP(const Instruction &Instr) const { return generateUnconstrainedPrototype( Instr, "instruction does not grow/shrink the FP stack"); } llvm::Expected handleCondMovFP(const Instruction &Instr) const { return generateUnconstrainedPrototype( Instr, "instruction does not grow/shrink the FP stack"); } }; class ExegesisX86Target : public ExegesisTarget { void addTargetSpecificPasses(llvm::PassManagerBase &PM) const override { // Lowers FP pseudo-instructions, e.g. ABS_Fp32 -> ABS_F. PM.add(llvm::createX86FloatingPointStackifierPass()); } std::vector setRegToConstant(const llvm::MCSubtargetInfo &STI, unsigned Reg) const override { // GPR. if (llvm::X86::GR8RegClass.contains(Reg)) return {llvm::MCInstBuilder(llvm::X86::MOV8ri).addReg(Reg).addImm(1)}; if (llvm::X86::GR16RegClass.contains(Reg)) return {llvm::MCInstBuilder(llvm::X86::MOV16ri).addReg(Reg).addImm(1)}; if (llvm::X86::GR32RegClass.contains(Reg)) return {llvm::MCInstBuilder(llvm::X86::MOV32ri).addReg(Reg).addImm(1)}; if (llvm::X86::GR64RegClass.contains(Reg)) return {llvm::MCInstBuilder(llvm::X86::MOV64ri32).addReg(Reg).addImm(1)}; // MMX. if (llvm::X86::VR64RegClass.contains(Reg)) return setVectorRegToConstant(Reg, 8, llvm::X86::MMX_MOVQ64rm); // {X,Y,Z}MM. if (llvm::X86::VR128XRegClass.contains(Reg)) { if (STI.getFeatureBits()[llvm::X86::FeatureAVX512]) return setVectorRegToConstant(Reg, 16, llvm::X86::VMOVDQU32Z128rm); if (STI.getFeatureBits()[llvm::X86::FeatureAVX]) return setVectorRegToConstant(Reg, 16, llvm::X86::VMOVDQUrm); return setVectorRegToConstant(Reg, 16, llvm::X86::MOVDQUrm); } if (llvm::X86::VR256XRegClass.contains(Reg)) { if (STI.getFeatureBits()[llvm::X86::FeatureAVX512]) return setVectorRegToConstant(Reg, 32, llvm::X86::VMOVDQU32Z256rm); return setVectorRegToConstant(Reg, 32, llvm::X86::VMOVDQUYrm); } if (llvm::X86::VR512RegClass.contains(Reg)) return setVectorRegToConstant(Reg, 64, llvm::X86::VMOVDQU32Zrm); // X87. if (llvm::X86::RFP32RegClass.contains(Reg) || llvm::X86::RFP64RegClass.contains(Reg) || llvm::X86::RFP80RegClass.contains(Reg)) return setVectorRegToConstant(Reg, 8, llvm::X86::LD_Fp64m); if (Reg == llvm::X86::EFLAGS) { // Set all flags to 0 but the bits that are "reserved and set to 1". constexpr const uint32_t kImmValue = 0x00007002u; std::vector Result; Result.push_back(allocateStackSpace(8)); Result.push_back(fillStackSpace(llvm::X86::MOV64mi32, 0, kImmValue)); Result.push_back(llvm::MCInstBuilder(llvm::X86::POPF64)); // Also pops. return Result; } return {}; } std::unique_ptr createLatencyBenchmarkRunner(const LLVMState &State) const override { return llvm::make_unique>(State); } std::unique_ptr createUopsBenchmarkRunner(const LLVMState &State) const override { return llvm::make_unique>(State); } bool matchesArch(llvm::Triple::ArchType Arch) const override { return Arch == llvm::Triple::x86_64 || Arch == llvm::Triple::x86; } private: // setRegToConstant() specialized for a vector register of size // `RegSizeBytes`. `RMOpcode` is the opcode used to do a memory -> vector // register load. static std::vector setVectorRegToConstant(const unsigned Reg, const unsigned RegSizeBytes, const unsigned RMOpcode) { // There is no instruction to directly set XMM, go through memory. // Since vector values can be interpreted as integers of various sizes (8 // to 64 bits) as well as floats and double, so we chose an immediate // value that has set bits for all byte values and is a normal float/ // double. 0x40404040 is ~32.5 when interpreted as a double and ~3.0f when // interpreted as a float. constexpr const uint32_t kImmValue = 0x40404040u; std::vector Result; Result.push_back(allocateStackSpace(RegSizeBytes)); constexpr const unsigned kMov32NumBytes = 4; for (unsigned Disp = 0; Disp < RegSizeBytes; Disp += kMov32NumBytes) { Result.push_back(fillStackSpace(llvm::X86::MOV32mi, Disp, kImmValue)); } Result.push_back(loadToReg(Reg, RMOpcode)); Result.push_back(releaseStackSpace(RegSizeBytes)); return Result; } // Allocates scratch memory on the stack. static llvm::MCInst allocateStackSpace(unsigned Bytes) { return llvm::MCInstBuilder(llvm::X86::SUB64ri8) .addReg(llvm::X86::RSP) .addReg(llvm::X86::RSP) .addImm(Bytes); } // Fills scratch memory at offset `OffsetBytes` with value `Imm`. static llvm::MCInst fillStackSpace(unsigned MovOpcode, unsigned OffsetBytes, uint64_t Imm) { return llvm::MCInstBuilder(MovOpcode) // Address = ESP .addReg(llvm::X86::RSP) // BaseReg .addImm(1) // ScaleAmt .addReg(0) // IndexReg .addImm(OffsetBytes) // Disp .addReg(0) // Segment // Immediate. .addImm(Imm); } // Loads scratch memory into register `Reg` using opcode `RMOpcode`. static llvm::MCInst loadToReg(unsigned Reg, unsigned RMOpcode) { return llvm::MCInstBuilder(RMOpcode) .addReg(Reg) // Address = ESP .addReg(llvm::X86::RSP) // BaseReg .addImm(1) // ScaleAmt .addReg(0) // IndexReg .addImm(0) // Disp .addReg(0); // Segment } // Releases scratch memory. static llvm::MCInst releaseStackSpace(unsigned Bytes) { return llvm::MCInstBuilder(llvm::X86::ADD64ri8) .addReg(llvm::X86::RSP) .addReg(llvm::X86::RSP) .addImm(Bytes); } }; } // namespace static ExegesisTarget *getTheExegesisX86Target() { static ExegesisX86Target Target; return &Target; } void InitializeX86ExegesisTarget() { ExegesisTarget::registerTarget(getTheExegesisX86Target()); } } // namespace exegesis