diff options
Diffstat (limited to 'llvm/lib/Bytecode/Writer')
-rw-r--r-- | llvm/lib/Bytecode/Writer/ConstantWriter.cpp | 154 | ||||
-rw-r--r-- | llvm/lib/Bytecode/Writer/InstructionWriter.cpp | 184 | ||||
-rw-r--r-- | llvm/lib/Bytecode/Writer/Makefile | 7 | ||||
-rw-r--r-- | llvm/lib/Bytecode/Writer/Writer.cpp | 182 | ||||
-rw-r--r-- | llvm/lib/Bytecode/Writer/WriterInternals.h | 74 |
5 files changed, 601 insertions, 0 deletions
diff --git a/llvm/lib/Bytecode/Writer/ConstantWriter.cpp b/llvm/lib/Bytecode/Writer/ConstantWriter.cpp new file mode 100644 index 00000000000..e0504a5b38f --- /dev/null +++ b/llvm/lib/Bytecode/Writer/ConstantWriter.cpp @@ -0,0 +1,154 @@ +//===-- WriteConst.cpp - Functions for writing constants ---------*- C++ -*--=// +// +// This file implements the routines for encoding constants to a bytecode +// stream. +// +// Note that the performance of this library is not terribly important, because +// it shouldn't be used by JIT type applications... so it is not a huge focus +// at least. :) +// +//===----------------------------------------------------------------------===// + +#include "WriterInternals.h" +#include "llvm/ConstPoolVals.h" +#include "llvm/SymbolTable.h" +#include "llvm/DerivedTypes.h" + +void BytecodeWriter::outputType(const Type *T) { + output_vbr((unsigned)T->getPrimitiveID(), Out); + + // That's all there is to handling primitive types... + if (T->isPrimitiveType()) + return; // We might do this if we alias a prim type: %x = type int + + switch (T->getPrimitiveID()) { // Handle derived types now. + case Type::MethodTyID: { + const MethodType *MT = (const MethodType*)T; + int Slot = Table.getValSlot(MT->getReturnType()); + assert(Slot != -1 && "Type used but not available!!"); + output_vbr((unsigned)Slot, Out); + + // Output all of the arguments... + MethodType::ParamTypes::const_iterator I = MT->getParamTypes().begin(); + for (; I != MT->getParamTypes().end(); I++) { + Slot = Table.getValSlot(*I); + assert(Slot != -1 && "Type used but not available!!"); + output_vbr((unsigned)Slot, Out); + } + + // Terminate list with VoidTy + output_vbr((unsigned)Type::VoidTy->getPrimitiveID(), Out); + break; + } + + case Type::ArrayTyID: { + const ArrayType *AT = (const ArrayType*)T; + int Slot = Table.getValSlot(AT->getElementType()); + assert(Slot != -1 && "Type used but not available!!"); + output_vbr((unsigned)Slot, Out); + //cerr << "Type slot = " << Slot << " Type = " << T->getName() << endl; + + output_vbr(AT->getNumElements(), Out); + break; + } + + case Type::StructTyID: { + const StructType *ST = (const StructType*)T; + + // Output all of the element types... + StructType::ElementTypes::const_iterator I = ST->getElementTypes().begin(); + for (; I != ST->getElementTypes().end(); I++) { + int Slot = Table.getValSlot(*I); + assert(Slot != -1 && "Type used but not available!!"); + output_vbr((unsigned)Slot, Out); + } + + // Terminate list with VoidTy + output_vbr((unsigned)Type::VoidTy->getPrimitiveID(), Out); + break; + } + + case Type::PointerTyID: { + const PointerType *PT = (const PointerType*)T; + int Slot = Table.getValSlot(PT->getValueType()); + assert(Slot != -1 && "Type used but not available!!"); + output_vbr((unsigned)Slot, Out); + break; + } + + case Type::ModuleTyID: + case Type::PackedTyID: + default: + cerr << __FILE__ << ":" << __LINE__ << ": Don't know how to serialize" + << " Type '" << T->getName() << "'\n"; + break; + } +} + +bool BytecodeWriter::outputConstant(const ConstPoolVal *CPV) { + switch (CPV->getType()->getPrimitiveID()) { + case Type::BoolTyID: // Boolean Types + if (((const ConstPoolBool*)CPV)->getValue()) + output_vbr((unsigned)1, Out); + else + output_vbr((unsigned)0, Out); + break; + + case Type::UByteTyID: // Unsigned integer types... + case Type::UShortTyID: + case Type::UIntTyID: + case Type::ULongTyID: + output_vbr(((const ConstPoolUInt*)CPV)->getValue(), Out); + break; + + case Type::SByteTyID: // Signed integer types... + case Type::ShortTyID: + case Type::IntTyID: + case Type::LongTyID: + output_vbr(((const ConstPoolSInt*)CPV)->getValue(), Out); + break; + + case Type::TypeTyID: // Serialize type type + outputType(((const ConstPoolType*)CPV)->getValue()); + break; + + case Type::ArrayTyID: { + const ConstPoolArray *CPA = (const ConstPoolArray *)CPV; + unsigned size = CPA->getValues().size(); + if (!((const ArrayType *)CPA->getType())->isSized()) + output_vbr(size, Out); // Not for sized arrays!!! + + for (unsigned i = 0; i < size; i++) { + int Slot = Table.getValSlot(CPA->getValues()[i]); + assert(Slot != -1 && "Constant used but not available!!"); + output_vbr((unsigned)Slot, Out); + } + break; + } + + case Type::StructTyID: { + const ConstPoolStruct *CPS = (const ConstPoolStruct*)CPV; + const vector<ConstPoolUse> &Vals = CPS->getValues(); + + for (unsigned i = 0; i < Vals.size(); ++i) { + int Slot = Table.getValSlot(Vals[i]); + assert(Slot != -1 && "Constant used but not available!!"); + output_vbr((unsigned)Slot, Out); + } + break; + } + + case Type::FloatTyID: // Floating point types... + case Type::DoubleTyID: + // TODO: Floating point type serialization + + + case Type::VoidTyID: + case Type::LabelTyID: + default: + cerr << __FILE__ << ":" << __LINE__ << ": Don't know how to serialize" + << " type '" << CPV->getType()->getName() << "'\n"; + break; + } + return false; +} diff --git a/llvm/lib/Bytecode/Writer/InstructionWriter.cpp b/llvm/lib/Bytecode/Writer/InstructionWriter.cpp new file mode 100644 index 00000000000..c7c04efb731 --- /dev/null +++ b/llvm/lib/Bytecode/Writer/InstructionWriter.cpp @@ -0,0 +1,184 @@ +//===-- WriteInst.cpp - Functions for writing instructions -------*- C++ -*--=// +// +// This file implements the routines for encoding instruction opcodes to a +// bytecode stream. +// +// Note that the performance of this library is not terribly important, because +// it shouldn't be used by JIT type applications... so it is not a huge focus +// at least. :) +// +//===----------------------------------------------------------------------===// + +#include "WriterInternals.h" +#include "llvm/Module.h" +#include "llvm/Method.h" +#include "llvm/BasicBlock.h" +#include "llvm/Instruction.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Tools/DataTypes.h" +#include <algorithm> + +typedef unsigned char uchar; + +// outputInstructionFormat0 - Output those wierd instructions that have a large +// number of operands or have large operands themselves... +// +// Format: [opcode] [type] [numargs] [arg0] [arg1] ... [arg<numargs-1>] +// +static void outputInstructionFormat0(const Instruction *I, + const SlotCalculator &Table, + unsigned Type, vector<uchar> &Out) { + // Opcode must have top two bits clear... + output_vbr(I->getInstType(), Out); // Instruction Opcode ID + output_vbr(Type, Out); // Result type + + unsigned NumArgs; // Count the number of arguments to the instruction + for (NumArgs = 0; I->getOperand(NumArgs); NumArgs++) /*empty*/; + output_vbr(NumArgs, Out); + + for (unsigned i = 0; const Value *N = I->getOperand(i); i++) { + assert(i < NumArgs && "Count of arguments failed!"); + + int Slot = Table.getValSlot(N); + output_vbr((unsigned)Slot, Out); + } + align32(Out); // We must maintain correct alignment! +} + + +// outputInstructionFormat1 - Output one operand instructions, knowing that no +// operand index is >= 2^12. +// +static void outputInstructionFormat1(const Instruction *I, + const SlotCalculator &Table, int *Slots, + unsigned Type, vector<uchar> &Out) { + unsigned IType = I->getInstType(); // Instruction Opcode ID + + // bits Instruction format: + // -------------------------- + // 31-30: Opcode type, fixed to 1. + // 29-24: Opcode + // 23-12: Resulting type plane + // 11- 0: Operand #1 (if set to (2^12-1), then zero operands) + // + unsigned Opcode = (1 << 30) | (IType << 24) | (Type << 12) | Slots[0]; + // cerr << "1 " << IType << " " << Type << " " << Slots[0] << endl; + output(Opcode, Out); +} + + +// outputInstructionFormat2 - Output two operand instructions, knowing that no +// operand index is >= 2^8. +// +static void outputInstructionFormat2(const Instruction *I, + const SlotCalculator &Table, int *Slots, + unsigned Type, vector<uchar> &Out) { + unsigned IType = I->getInstType(); // Instruction Opcode ID + + // bits Instruction format: + // -------------------------- + // 31-30: Opcode type, fixed to 2. + // 29-24: Opcode + // 23-16: Resulting type plane + // 15- 8: Operand #1 + // 7- 0: Operand #2 + // + unsigned Opcode = (2 << 30) | (IType << 24) | (Type << 16) | + (Slots[0] << 8) | (Slots[1] << 0); + // cerr << "2 " << IType << " " << Type << " " << Slots[0] << " " + // << Slots[1] << endl; + output(Opcode, Out); +} + + +// outputInstructionFormat3 - Output three operand instructions, knowing that no +// operand index is >= 2^6. +// +static void outputInstructionFormat3(const Instruction *I, + const SlotCalculator &Table, int *Slots, + unsigned Type, vector<uchar> &Out) { + unsigned IType = I->getInstType(); // Instruction Opcode ID + + // bits Instruction format: + // -------------------------- + // 31-30: Opcode type, fixed to 3 + // 29-24: Opcode + // 23-18: Resulting type plane + // 17-12: Operand #1 + // 11- 6: Operand #2 + // 5- 0: Operand #3 + // + unsigned Opcode = (3 << 30) | (IType << 24) | (Type << 18) | + (Slots[0] << 12) | (Slots[1] << 6) | (Slots[2] << 0); + // cerr << "3 " << IType << " " << Type << " " << Slots[0] << " " + // << Slots[1] << " " << Slots[2] << endl; + output(Opcode, Out); +} + +bool BytecodeWriter::processInstruction(const Instruction *I) { + assert(I->getInstType() < 64 && "Opcode too big???"); + + unsigned NumOperands = 0; + int MaxOpSlot = 0; + int Slots[3]; Slots[0] = (1 << 12)-1; + + const Value *Def; + while ((Def = I->getOperand(NumOperands))) { + int slot = Table.getValSlot(Def); + assert(slot != -1 && "Broken bytecode!"); + if (slot > MaxOpSlot) MaxOpSlot = slot; + if (NumOperands < 3) Slots[NumOperands] = slot; + NumOperands++; + } + + // Figure out which type to encode with the instruction. Typically we want + // the type of the first parameter, as opposed to the type of the instruction + // (for example, with setcc, we always know it returns bool, but the type of + // the first param is actually interesting). But if we have no arguments + // we take the type of the instruction itself. + // + + const Type *Ty; + if (NumOperands) + Ty = I->getOperand(0)->getType(); + else + Ty = I->getType(); + + unsigned Type; + int Slot = Table.getValSlot(Ty); + assert(Slot != -1 && "Type not available!!?!"); + Type = (unsigned)Slot; + + + // Decide which instruction encoding to use. This is determined primarily by + // the number of operands, and secondarily by whether or not the max operand + // will fit into the instruction encoding. More operands == fewer bits per + // operand. + // + switch (NumOperands) { + case 0: + case 1: + if (MaxOpSlot < (1 << 12)-1) { // -1 because we use 4095 to indicate 0 ops + outputInstructionFormat1(I, Table, Slots, Type, Out); + return false; + } + break; + + case 2: + if (MaxOpSlot < (1 << 8)) { + outputInstructionFormat2(I, Table, Slots, Type, Out); + return false; + } + break; + + case 3: + if (MaxOpSlot < (1 << 6)) { + outputInstructionFormat3(I, Table, Slots, Type, Out); + return false; + } + break; + } + + outputInstructionFormat0(I, Table, Type, Out); + return false; +} diff --git a/llvm/lib/Bytecode/Writer/Makefile b/llvm/lib/Bytecode/Writer/Makefile new file mode 100644 index 00000000000..c03db561543 --- /dev/null +++ b/llvm/lib/Bytecode/Writer/Makefile @@ -0,0 +1,7 @@ + +LEVEL = ../../.. + +LIBRARYNAME = bcwriter + +include $(LEVEL)/Makefile.common + diff --git a/llvm/lib/Bytecode/Writer/Writer.cpp b/llvm/lib/Bytecode/Writer/Writer.cpp new file mode 100644 index 00000000000..d03c9454713 --- /dev/null +++ b/llvm/lib/Bytecode/Writer/Writer.cpp @@ -0,0 +1,182 @@ +//===-- Writer.cpp - Library for writing VM bytecode files -------*- C++ -*--=// +// +// This library implements the functionality defined in llvm/Bytecode/Writer.h +// +// This library uses the Analysis library to figure out offsets for +// variables in the method tables... +// +// Note that this file uses an unusual technique of outputting all the bytecode +// to a vector of unsigned char's, then copies the vector to an ostream. The +// reason for this is that we must do "seeking" in the stream to do back- +// patching, and some very important ostreams that we want to support (like +// pipes) do not support seeking. :( :( :( +// +// The choice of the vector data structure is influenced by the extremely fast +// "append" speed, plus the free "seek"/replace in the middle of the stream. +// +// Note that the performance of this library is not terribly important, because +// it shouldn't be used by JIT type applications... so it is not a huge focus +// at least. :) +// +//===----------------------------------------------------------------------===// + +#include "WriterInternals.h" +#include "llvm/Module.h" +#include "llvm/Method.h" +#include "llvm/BasicBlock.h" +#include "llvm/ConstPoolVals.h" +#include "llvm/SymbolTable.h" +#include "llvm/DerivedTypes.h" +#include <string.h> +#include <algorithm> + +BytecodeWriter::BytecodeWriter(vector<unsigned char> &o, const Module *M) + : Out(o), Table(M, false) { + + outputSignature(); + + // Emit the top level CLASS block. + BytecodeBlock ModuleBlock(BytecodeFormat::Module, Out); + + // Output largest ID of first "primitive" type: + output_vbr((unsigned)Type::FirstDerivedTyID, Out); + align32(Out); + + // Do the whole module now! + processModule(M); + + // If needed, output the symbol table for the class... + if (M->hasSymbolTable()) + outputSymbolTable(*M->getSymbolTable()); +} + +// TODO: REMOVE +#include "llvm/Assembly/Writer.h" + +bool BytecodeWriter::processConstPool(const ConstantPool &CP, bool isMethod) { + BytecodeBlock *CPool = new BytecodeBlock(BytecodeFormat::ConstantPool, Out); + + unsigned NumPlanes = Table.getNumPlanes(); + + for (unsigned pno = 0; pno < NumPlanes; pno++) { + const vector<const Value*> &Plane = Table.getPlane(pno); + if (Plane.empty()) continue; // Skip empty type planes... + + unsigned ValNo = 0; // Don't reemit module constants + if (isMethod) ValNo = Table.getModuleLevel(pno); + + unsigned NumConstants = 0; + for (unsigned vn = ValNo; vn < Plane.size(); vn++) + if (Plane[vn]->getValueType() == Value::ConstantVal) + NumConstants++; + + if (NumConstants == 0) continue; // Skip empty type planes... + + // Output type header: [num entries][type id number] + // + output_vbr(NumConstants, Out); + + // Output the Type ID Number... + int Slot = Table.getValSlot(Plane.front()->getType()); + assert (Slot != -1 && "Type in constant pool but not in method!!"); + output_vbr((unsigned)Slot, Out); + + //cerr << "NC: " << NumConstants << " Slot = " << hex << Slot << endl; + + for (; ValNo < Plane.size(); ValNo++) { + const Value *V = Plane[ValNo]; + if (V->getValueType() == Value::ConstantVal) { + //cerr << "Serializing value: <" << V->getType() << ">: " + // << ((const ConstPoolVal*)V)->getStrValue() << ":" + // << Out.size() << "\n"; + outputConstant((const ConstPoolVal*)V); + } + } + } + + delete CPool; // End bytecode block section! + + if (!isMethod) { // The ModuleInfoBlock follows directly after the c-pool + assert(CP.getParent()->getValueType() == Value::ModuleVal); + outputModuleInfoBlock((const Module*)CP.getParent()); + } + + return false; +} + +void BytecodeWriter::outputModuleInfoBlock(const Module *M) { + BytecodeBlock ModuleInfoBlock(BytecodeFormat::ModuleGlobalInfo, Out); + + // Output the types of the methods in this class + Module::MethodListType::const_iterator I = M->getMethodList().begin(); + while (I != M->getMethodList().end()) { + int Slot = Table.getValSlot((*I)->getType()); + assert(Slot != -1 && "Module const pool is broken!"); + assert(Slot >= Type::FirstDerivedTyID && "Derived type not in range!"); + output_vbr((unsigned)Slot, Out); + I++; + } + output_vbr((unsigned)Table.getValSlot(Type::VoidTy), Out); + align32(Out); +} + +bool BytecodeWriter::processMethod(const Method *M) { + BytecodeBlock MethodBlock(BytecodeFormat::Method, Out); + + Table.incorporateMethod(M); + + if (ModuleAnalyzer::processMethod(M)) return true; + + // If needed, output the symbol table for the method... + if (M->hasSymbolTable()) + outputSymbolTable(*M->getSymbolTable()); + + Table.purgeMethod(); + return false; +} + + +bool BytecodeWriter::processBasicBlock(const BasicBlock *BB) { + BytecodeBlock MethodBlock(BytecodeFormat::BasicBlock, Out); + return ModuleAnalyzer::processBasicBlock(BB); +} + +void BytecodeWriter::outputSymbolTable(const SymbolTable &MST) { + BytecodeBlock MethodBlock(BytecodeFormat::SymbolTable, Out); + + for (SymbolTable::const_iterator TI = MST.begin(); TI != MST.end(); TI++) { + SymbolTable::type_const_iterator I = MST.type_begin(TI->first); + SymbolTable::type_const_iterator End = MST.type_end(TI->first); + int Slot; + + if (I == End) continue; // Don't mess with an absent type... + + // Symtab block header: [num entries][type id number] + output_vbr(MST.type_size(TI->first), Out); + + Slot = Table.getValSlot(TI->first); + assert(Slot != -1 && "Type in symtab, but not in table!"); + output_vbr((unsigned)Slot, Out); + + for (; I != End; I++) { + // Symtab entry: [def slot #][name] + Slot = Table.getValSlot(I->second); + assert (Slot != -1 && "Value in symtab but not in method!!"); + output_vbr((unsigned)Slot, Out); + output(I->first, Out, false); // Don't force alignment... + } + } +} + +void WriteBytecodeToFile(const Module *C, ostream &Out) { + assert(C && "You can't write a null class!!"); + + vector<unsigned char> Buffer; + + // This object populates buffer for us... + BytecodeWriter BCW(Buffer, C); + + // Okay, write the vector out to the ostream now... + Out.write(&Buffer[0], Buffer.size()); + Out.flush(); +} diff --git a/llvm/lib/Bytecode/Writer/WriterInternals.h b/llvm/lib/Bytecode/Writer/WriterInternals.h new file mode 100644 index 00000000000..be9ccf96672 --- /dev/null +++ b/llvm/lib/Bytecode/Writer/WriterInternals.h @@ -0,0 +1,74 @@ +//===-- WriterInternals.h - Data structures shared by the Writer -*- C++ -*--=// +// +// This header defines the interface used between components of the bytecode +// writer. +// +// Note that the performance of this library is not terribly important, because +// it shouldn't be used by JIT type applications... so it is not a huge focus +// at least. :) +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_BYTECODE_WRITER_WRITERINTERNALS_H +#define LLVM_LIB_BYTECODE_WRITER_WRITERINTERNALS_H + +#include "llvm/Bytecode/Writer.h" +#include "llvm/Bytecode/Format.h" +#include "llvm/Bytecode/Primitives.h" +#include "llvm/Analysis/SlotCalculator.h" +#include "llvm/Tools/DataTypes.h" +#include "llvm/Instruction.h" + +class BytecodeWriter : public ModuleAnalyzer { + vector<unsigned char> &Out; + SlotCalculator Table; +public: + BytecodeWriter(vector<unsigned char> &o, const Module *M); + +protected: + virtual bool processConstPool(const ConstantPool &CP, bool isMethod); + virtual bool processMethod(const Method *M); + virtual bool processBasicBlock(const BasicBlock *BB); + virtual bool processInstruction(const Instruction *I); + +private : + inline void outputSignature() { + static const unsigned char *Sig = (const unsigned char*)"llvm"; + Out.insert(Out.end(), Sig, Sig+4); // output the bytecode signature... + } + + void outputModuleInfoBlock(const Module *C); + void outputSymbolTable(const SymbolTable &ST); + bool outputConstant(const ConstPoolVal *CPV); + void outputType(const Type *T); +}; + + + + +// BytecodeBlock - Little helper class that helps us do backpatching of bytecode +// block sizes really easily. It backpatches when it goes out of scope. +// +class BytecodeBlock { + unsigned Loc; + vector<unsigned char> &Out; + + BytecodeBlock(const BytecodeBlock &); // do not implement + void operator=(const BytecodeBlock &); // do not implement +public: + inline BytecodeBlock(unsigned ID, vector<unsigned char> &o) : Out(o) { + output(ID, Out); + output((unsigned)0, Out); // Reserve the space for the block size... + Loc = Out.size(); + } + + inline ~BytecodeBlock() { // Do backpatch when block goes out + // of scope... + // cerr << "OldLoc = " << Loc << " NewLoc = " << NewLoc << " diff = " << (NewLoc-Loc) << endl; + output((unsigned)(Out.size()-Loc), Out, (int)Loc-4); + align32(Out); // Blocks must ALWAYS be aligned + } +}; + + +#endif |