summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/SICodeEmitter.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SICodeEmitter.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SICodeEmitter.cpp321
1 files changed, 321 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/SICodeEmitter.cpp b/llvm/lib/Target/AMDGPU/SICodeEmitter.cpp
new file mode 100644
index 00000000000..f169287f038
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/SICodeEmitter.cpp
@@ -0,0 +1,321 @@
+//===-- SICodeEmitter.cpp - SI Code Emitter -------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The SI code emitter produces machine code that can be executed directly on
+// the GPU device.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "AMDGPU.h"
+#include "AMDGPUUtil.h"
+#include "AMDILCodeEmitter.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/TargetMachine.h"
+
+#include <stdio.h>
+
+#define LITERAL_REG 255
+#define VGPR_BIT(src_idx) (1ULL << (9 * src_idx - 1))
+using namespace llvm;
+
+namespace {
+
+ class SICodeEmitter : public MachineFunctionPass, public AMDILCodeEmitter {
+
+ private:
+ static char ID;
+ formatted_raw_ostream &_OS;
+ const TargetMachine *TM;
+ void emitState(MachineFunction & MF);
+ void emitInstr(MachineInstr &MI);
+
+ void outputBytes(uint64_t value, unsigned bytes);
+ unsigned GPRAlign(const MachineInstr &MI, unsigned OpNo, unsigned shift)
+ const;
+
+ public:
+ SICodeEmitter(formatted_raw_ostream &OS) : MachineFunctionPass(ID),
+ _OS(OS), TM(NULL) { }
+ const char *getPassName() const { return "SI Code Emitter"; }
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ /// getMachineOpValue - Return the encoding for MO
+ virtual uint64_t getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const;
+
+ /// GPR4AlignEncode - Encoding for when 4 consectuive registers are used
+ virtual unsigned GPR4AlignEncode(const MachineInstr &MI, unsigned OpNo)
+ const;
+
+ /// GPR2AlignEncode - Encoding for when 2 consecutive registers are used
+ virtual unsigned GPR2AlignEncode(const MachineInstr &MI, unsigned OpNo)
+ const;
+ /// i32LiteralEncode - Encode an i32 literal this is used as an operand
+ /// for an instruction in place of a register.
+ virtual uint64_t i32LiteralEncode(const MachineInstr &MI, unsigned OpNo)
+ const;
+ /// SMRDmemriEncode - Encoding for SMRD indexed loads
+ virtual uint32_t SMRDmemriEncode(const MachineInstr &MI, unsigned OpNo)
+ const;
+
+ /// VOPPostEncode - Post-Encoder method for VOP instructions
+ virtual uint64_t VOPPostEncode(const MachineInstr &MI,
+ uint64_t Value) const;
+ };
+}
+
+char SICodeEmitter::ID = 0;
+
+FunctionPass *llvm::createSICodeEmitterPass(formatted_raw_ostream &OS) {
+ return new SICodeEmitter(OS);
+}
+
+void SICodeEmitter::emitState(MachineFunction & MF)
+{
+ unsigned maxSGPR = 0;
+ unsigned maxVGPR = 0;
+ bool VCCUsed = false;
+ const SIRegisterInfo * RI =
+ static_cast<const SIRegisterInfo*>(TM->getRegisterInfo());
+ SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ MachineInstr &MI = *I;
+ unsigned numOperands = MI.getNumOperands();
+ for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
+ MachineOperand & MO = MI.getOperand(op_idx);
+ unsigned maxUsed;
+ unsigned width = 0;
+ bool isSGPR = false;
+ unsigned reg;
+ unsigned hwReg;
+ if (!MO.isReg()) {
+ continue;
+ }
+ reg = MO.getReg();
+ if (reg == AMDGPU::VCC) {
+ VCCUsed = true;
+ continue;
+ }
+ if (AMDGPU::SReg_32RegClass.contains(reg)) {
+ isSGPR = true;
+ width = 1;
+ } else if (AMDGPU::VReg_32RegClass.contains(reg)) {
+ isSGPR = false;
+ width = 1;
+ } else if (AMDGPU::SReg_64RegClass.contains(reg)) {
+ isSGPR = true;
+ width = 2;
+ } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
+ isSGPR = false;
+ width = 2;
+ } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
+ isSGPR = true;
+ width = 4;
+ } else if (AMDGPU::VReg_128RegClass.contains(reg)) {
+ isSGPR = false;
+ width = 4;
+ } else if (AMDGPU::SReg_256RegClass.contains(reg)) {
+ isSGPR = true;
+ width = 8;
+ } else {
+ assert("!Unknown register class");
+ }
+ hwReg = RI->getEncodingValue(reg);
+ maxUsed = ((hwReg + 1) * width) - 1;
+ if (isSGPR) {
+ maxSGPR = maxUsed > maxSGPR ? maxUsed : maxSGPR;
+ } else {
+ maxVGPR = maxUsed > maxVGPR ? maxUsed : maxVGPR;
+ }
+ }
+ }
+ }
+ if (VCCUsed) {
+ maxSGPR += 2;
+ }
+ outputBytes(maxSGPR + 1, 4);
+ outputBytes(maxVGPR + 1, 4);
+ outputBytes(MFI->spi_ps_input_addr, 4);
+}
+
+bool SICodeEmitter::runOnMachineFunction(MachineFunction &MF)
+{
+ TM = &MF.getTarget();
+ const AMDILSubtarget &STM = TM->getSubtarget<AMDILSubtarget>();
+
+ if (STM.dumpCode()) {
+ MF.dump();
+ }
+
+ emitState(MF);
+
+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
+ BB != BB_E; ++BB) {
+ MachineBasicBlock &MBB = *BB;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ MachineInstr &MI = *I;
+ if (MI.getOpcode() != AMDGPU::KILL && MI.getOpcode() != AMDGPU::RETURN) {
+ emitInstr(MI);
+ }
+ }
+ }
+ // Emit S_END_PGM
+ MachineInstr * End = BuildMI(MF, DebugLoc(),
+ TM->getInstrInfo()->get(AMDGPU::S_ENDPGM));
+ emitInstr(*End);
+ return false;
+}
+
+void SICodeEmitter::emitInstr(MachineInstr &MI)
+{
+ const SIInstrInfo * SII = static_cast<const SIInstrInfo*>(TM->getInstrInfo());
+
+ uint64_t hwInst = getBinaryCodeForInstr(MI);
+
+ if ((hwInst & 0xffffffff) == 0xffffffff) {
+ fprintf(stderr, "Unsupported Instruction: \n");
+ MI.dump();
+ abort();
+ }
+
+ unsigned bytes = SII->getEncodingBytes(MI);
+ outputBytes(hwInst, bytes);
+}
+
+uint64_t SICodeEmitter::getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const
+{
+ const SIRegisterInfo * RI =
+ static_cast<const SIRegisterInfo*>(TM->getRegisterInfo());
+
+ switch(MO.getType()) {
+ case MachineOperand::MO_Register:
+ return RI->getEncodingValue(MO.getReg());
+
+ case MachineOperand::MO_Immediate:
+ return MO.getImm();
+
+ case MachineOperand::MO_FPImmediate:
+ // XXX: Not all instructions can use inline literals
+ // XXX: We should make sure this is a 32-bit constant
+ return LITERAL_REG | (MO.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue() << 32);
+ default:
+ llvm_unreachable("Encoding of this operand type is not supported yet.");
+ break;
+ }
+}
+
+unsigned SICodeEmitter::GPRAlign(const MachineInstr &MI, unsigned OpNo,
+ unsigned shift) const
+{
+ const SIRegisterInfo * RI =
+ static_cast<const SIRegisterInfo*>(TM->getRegisterInfo());
+ unsigned regCode = RI->getEncodingValue(MI.getOperand(OpNo).getReg());
+ return regCode >> shift;
+}
+
+unsigned SICodeEmitter::GPR4AlignEncode(const MachineInstr &MI,
+ unsigned OpNo) const
+{
+ return GPRAlign(MI, OpNo, 2);
+}
+
+unsigned SICodeEmitter::GPR2AlignEncode(const MachineInstr &MI,
+ unsigned OpNo) const
+{
+ return GPRAlign(MI, OpNo, 1);
+}
+
+uint64_t SICodeEmitter::i32LiteralEncode(const MachineInstr &MI,
+ unsigned OpNo) const
+{
+ return LITERAL_REG | (MI.getOperand(OpNo).getImm() << 32);
+}
+
+#define SMRD_OFFSET_MASK 0xff
+#define SMRD_IMM_SHIFT 8
+#define SMRD_SBASE_MASK 0x3f
+#define SMRD_SBASE_SHIFT 9
+/// SMRDmemriEncode - This function is responsibe for encoding the offset
+/// and the base ptr for SMRD instructions it should return a bit string in
+/// this format:
+///
+/// OFFSET = bits{7-0}
+/// IMM = bits{8}
+/// SBASE = bits{14-9}
+///
+uint32_t SICodeEmitter::SMRDmemriEncode(const MachineInstr &MI,
+ unsigned OpNo) const
+{
+ uint32_t encoding;
+
+ const MachineOperand &OffsetOp = MI.getOperand(OpNo + 1);
+
+ //XXX: Use this function for SMRD loads with register offsets
+ assert(OffsetOp.isImm());
+
+ encoding =
+ (getMachineOpValue(MI, OffsetOp) & SMRD_OFFSET_MASK)
+ | (1 << SMRD_IMM_SHIFT) //XXX If the Offset is a register we shouldn't set this bit
+ | ((GPR2AlignEncode(MI, OpNo) & SMRD_SBASE_MASK) << SMRD_SBASE_SHIFT)
+ ;
+
+ return encoding;
+}
+
+/// Set the "VGPR" bit for VOP args that can take either a VGPR or a SGPR.
+/// XXX: It would be nice if we could handle this without a PostEncode function.
+uint64_t SICodeEmitter::VOPPostEncode(const MachineInstr &MI,
+ uint64_t Value) const
+{
+ const SIInstrInfo * SII = static_cast<const SIInstrInfo*>(TM->getInstrInfo());
+ unsigned encodingType = SII->getEncodingType(MI);
+ unsigned numSrcOps;
+ unsigned vgprBitOffset;
+
+ if (encodingType == SIInstrEncodingType::VOP3) {
+ numSrcOps = 3;
+ vgprBitOffset = 32;
+ } else {
+ numSrcOps = 1;
+ vgprBitOffset = 0;
+ }
+
+ // Add one to skip over the destination reg operand.
+ for (unsigned opIdx = 1; opIdx < numSrcOps + 1; opIdx++) {
+ if (!MI.getOperand(opIdx).isReg()) {
+ continue;
+ }
+ unsigned reg = MI.getOperand(opIdx).getReg();
+ if (AMDGPU::VReg_32RegClass.contains(reg)
+ || AMDGPU::VReg_64RegClass.contains(reg)) {
+ Value |= (VGPR_BIT(opIdx)) << vgprBitOffset;
+ }
+ }
+ return Value;
+}
+
+
+void SICodeEmitter::outputBytes(uint64_t value, unsigned bytes)
+{
+ for (unsigned i = 0; i < bytes; i++) {
+ _OS.write((uint8_t) ((value >> (8 * i)) & 0xff));
+ }
+}
OpenPOWER on IntegriCloud