diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SICodeEmitter.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SICodeEmitter.cpp | 321 |
1 files changed, 321 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SICodeEmitter.cpp b/llvm/lib/Target/AMDGPU/SICodeEmitter.cpp new file mode 100644 index 00000000000..f169287f038 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/SICodeEmitter.cpp @@ -0,0 +1,321 @@ +//===-- SICodeEmitter.cpp - SI Code Emitter -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The SI code emitter produces machine code that can be executed directly on +// the GPU device. +// +//===----------------------------------------------------------------------===// + + +#include "AMDGPU.h" +#include "AMDGPUUtil.h" +#include "AMDILCodeEmitter.h" +#include "SIInstrInfo.h" +#include "SIMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Target/TargetMachine.h" + +#include <stdio.h> + +#define LITERAL_REG 255 +#define VGPR_BIT(src_idx) (1ULL << (9 * src_idx - 1)) +using namespace llvm; + +namespace { + + class SICodeEmitter : public MachineFunctionPass, public AMDILCodeEmitter { + + private: + static char ID; + formatted_raw_ostream &_OS; + const TargetMachine *TM; + void emitState(MachineFunction & MF); + void emitInstr(MachineInstr &MI); + + void outputBytes(uint64_t value, unsigned bytes); + unsigned GPRAlign(const MachineInstr &MI, unsigned OpNo, unsigned shift) + const; + + public: + SICodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID), + _OS(OS), TM(NULL) { } + const char *getPassName() const { return "SI Code Emitter"; } + bool runOnMachineFunction(MachineFunction &MF); + + /// getMachineOpValue - Return the encoding for MO + virtual uint64_t getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) const; + + /// GPR4AlignEncode - Encoding for when 4 consectuive registers are used + virtual unsigned GPR4AlignEncode(const MachineInstr &MI, unsigned OpNo) + const; + + /// GPR2AlignEncode - Encoding for when 2 consecutive registers are used + virtual unsigned GPR2AlignEncode(const MachineInstr &MI, unsigned OpNo) + const; + /// i32LiteralEncode - Encode an i32 literal this is used as an operand + /// for an instruction in place of a register. + virtual uint64_t i32LiteralEncode(const MachineInstr &MI, unsigned OpNo) + const; + /// SMRDmemriEncode - Encoding for SMRD indexed loads + virtual uint32_t SMRDmemriEncode(const MachineInstr &MI, unsigned OpNo) + const; + + /// VOPPostEncode - Post-Encoder method for VOP instructions + virtual uint64_t VOPPostEncode(const MachineInstr &MI, + uint64_t Value) const; + }; +} + +char SICodeEmitter::ID = 0; + +FunctionPass *llvm::createSICodeEmitterPass(formatted_raw_ostream &OS) { + return new SICodeEmitter(OS); +} + +void SICodeEmitter::emitState(MachineFunction & MF) +{ + unsigned maxSGPR = 0; + unsigned maxVGPR = 0; + bool VCCUsed = false; + const SIRegisterInfo * RI = + static_cast<const SIRegisterInfo*>(TM->getRegisterInfo()); + SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>(); + + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); + BB != BB_E; ++BB) { + MachineBasicBlock &MBB = *BB; + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E; ++I) { + MachineInstr &MI = *I; + unsigned numOperands = MI.getNumOperands(); + for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) { + MachineOperand & MO = MI.getOperand(op_idx); + unsigned maxUsed; + unsigned width = 0; + bool isSGPR = false; + unsigned reg; + unsigned hwReg; + if (!MO.isReg()) { + continue; + } + reg = MO.getReg(); + if (reg == AMDGPU::VCC) { + VCCUsed = true; + continue; + } + if (AMDGPU::SReg_32RegClass.contains(reg)) { + isSGPR = true; + width = 1; + } else if (AMDGPU::VReg_32RegClass.contains(reg)) { + isSGPR = false; + width = 1; + } else if (AMDGPU::SReg_64RegClass.contains(reg)) { + isSGPR = true; + width = 2; + } else if (AMDGPU::VReg_64RegClass.contains(reg)) { + isSGPR = false; + width = 2; + } else if (AMDGPU::SReg_128RegClass.contains(reg)) { + isSGPR = true; + width = 4; + } else if (AMDGPU::VReg_128RegClass.contains(reg)) { + isSGPR = false; + width = 4; + } else if (AMDGPU::SReg_256RegClass.contains(reg)) { + isSGPR = true; + width = 8; + } else { + assert("!Unknown register class"); + } + hwReg = RI->getEncodingValue(reg); + maxUsed = ((hwReg + 1) * width) - 1; + if (isSGPR) { + maxSGPR = maxUsed > maxSGPR ? maxUsed : maxSGPR; + } else { + maxVGPR = maxUsed > maxVGPR ? maxUsed : maxVGPR; + } + } + } + } + if (VCCUsed) { + maxSGPR += 2; + } + outputBytes(maxSGPR + 1, 4); + outputBytes(maxVGPR + 1, 4); + outputBytes(MFI->spi_ps_input_addr, 4); +} + +bool SICodeEmitter::runOnMachineFunction(MachineFunction &MF) +{ + TM = &MF.getTarget(); + const AMDILSubtarget &STM = TM->getSubtarget<AMDILSubtarget>(); + + if (STM.dumpCode()) { + MF.dump(); + } + + emitState(MF); + + for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); + BB != BB_E; ++BB) { + MachineBasicBlock &MBB = *BB; + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E; ++I) { + MachineInstr &MI = *I; + if (MI.getOpcode() != AMDGPU::KILL && MI.getOpcode() != AMDGPU::RETURN) { + emitInstr(MI); + } + } + } + // Emit S_END_PGM + MachineInstr * End = BuildMI(MF, DebugLoc(), + TM->getInstrInfo()->get(AMDGPU::S_ENDPGM)); + emitInstr(*End); + return false; +} + +void SICodeEmitter::emitInstr(MachineInstr &MI) +{ + const SIInstrInfo * SII = static_cast<const SIInstrInfo*>(TM->getInstrInfo()); + + uint64_t hwInst = getBinaryCodeForInstr(MI); + + if ((hwInst & 0xffffffff) == 0xffffffff) { + fprintf(stderr, "Unsupported Instruction: \n"); + MI.dump(); + abort(); + } + + unsigned bytes = SII->getEncodingBytes(MI); + outputBytes(hwInst, bytes); +} + +uint64_t SICodeEmitter::getMachineOpValue(const MachineInstr &MI, + const MachineOperand &MO) const +{ + const SIRegisterInfo * RI = + static_cast<const SIRegisterInfo*>(TM->getRegisterInfo()); + + switch(MO.getType()) { + case MachineOperand::MO_Register: + return RI->getEncodingValue(MO.getReg()); + + case MachineOperand::MO_Immediate: + return MO.getImm(); + + case MachineOperand::MO_FPImmediate: + // XXX: Not all instructions can use inline literals + // XXX: We should make sure this is a 32-bit constant + return LITERAL_REG | (MO.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue() << 32); + default: + llvm_unreachable("Encoding of this operand type is not supported yet."); + break; + } +} + +unsigned SICodeEmitter::GPRAlign(const MachineInstr &MI, unsigned OpNo, + unsigned shift) const +{ + const SIRegisterInfo * RI = + static_cast<const SIRegisterInfo*>(TM->getRegisterInfo()); + unsigned regCode = RI->getEncodingValue(MI.getOperand(OpNo).getReg()); + return regCode >> shift; +} + +unsigned SICodeEmitter::GPR4AlignEncode(const MachineInstr &MI, + unsigned OpNo) const +{ + return GPRAlign(MI, OpNo, 2); +} + +unsigned SICodeEmitter::GPR2AlignEncode(const MachineInstr &MI, + unsigned OpNo) const +{ + return GPRAlign(MI, OpNo, 1); +} + +uint64_t SICodeEmitter::i32LiteralEncode(const MachineInstr &MI, + unsigned OpNo) const +{ + return LITERAL_REG | (MI.getOperand(OpNo).getImm() << 32); +} + +#define SMRD_OFFSET_MASK 0xff +#define SMRD_IMM_SHIFT 8 +#define SMRD_SBASE_MASK 0x3f +#define SMRD_SBASE_SHIFT 9 +/// SMRDmemriEncode - This function is responsibe for encoding the offset +/// and the base ptr for SMRD instructions it should return a bit string in +/// this format: +/// +/// OFFSET = bits{7-0} +/// IMM = bits{8} +/// SBASE = bits{14-9} +/// +uint32_t SICodeEmitter::SMRDmemriEncode(const MachineInstr &MI, + unsigned OpNo) const +{ + uint32_t encoding; + + const MachineOperand &OffsetOp = MI.getOperand(OpNo + 1); + + //XXX: Use this function for SMRD loads with register offsets + assert(OffsetOp.isImm()); + + encoding = + (getMachineOpValue(MI, OffsetOp) & SMRD_OFFSET_MASK) + | (1 << SMRD_IMM_SHIFT) //XXX If the Offset is a register we shouldn't set this bit + | ((GPR2AlignEncode(MI, OpNo) & SMRD_SBASE_MASK) << SMRD_SBASE_SHIFT) + ; + + return encoding; +} + +/// Set the "VGPR" bit for VOP args that can take either a VGPR or a SGPR. +/// XXX: It would be nice if we could handle this without a PostEncode function. +uint64_t SICodeEmitter::VOPPostEncode(const MachineInstr &MI, + uint64_t Value) const +{ + const SIInstrInfo * SII = static_cast<const SIInstrInfo*>(TM->getInstrInfo()); + unsigned encodingType = SII->getEncodingType(MI); + unsigned numSrcOps; + unsigned vgprBitOffset; + + if (encodingType == SIInstrEncodingType::VOP3) { + numSrcOps = 3; + vgprBitOffset = 32; + } else { + numSrcOps = 1; + vgprBitOffset = 0; + } + + // Add one to skip over the destination reg operand. + for (unsigned opIdx = 1; opIdx < numSrcOps + 1; opIdx++) { + if (!MI.getOperand(opIdx).isReg()) { + continue; + } + unsigned reg = MI.getOperand(opIdx).getReg(); + if (AMDGPU::VReg_32RegClass.contains(reg) + || AMDGPU::VReg_64RegClass.contains(reg)) { + Value |= (VGPR_BIT(opIdx)) << vgprBitOffset; + } + } + return Value; +} + + +void SICodeEmitter::outputBytes(uint64_t value, unsigned bytes) +{ + for (unsigned i = 0; i < bytes; i++) { + _OS.write((uint8_t) ((value >> (8 * i)) & 0xff)); + } +} |