From 7e36390f24f6ceaea7bc2ba4adcd55d06cf73439 Mon Sep 17 00:00:00 2001 From: Yabin Hu Date: Thu, 29 Nov 2012 16:08:29 +0800 Subject: [PATCH] Add llvm.codegen intrinsic. The llvm.codegen intrinsic generates code for embedded LLVM-IR strings. Each call to the intrinsic is replaced by a pointer to the newly generated target code. The code generation target can be different to the one of the parent module. --- docs/LangRef.html | 36 +++ include/llvm/CodeGen/Passes.h | 3 + include/llvm/InitializePasses.h | 1 + include/llvm/Intrinsics.td | 4 + lib/CodeGen/CMakeLists.txt | 1 + lib/CodeGen/CodeGen.cpp | 1 + lib/CodeGen/CodeGenIntrinsic.cpp | 227 ++++++++++++++++++++ lib/CodeGen/Passes.cpp | 3 + lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 2 + lib/Target/LLVMBuild.txt | 2 +- lib/VMCore/Verifier.cpp | 10 + .../CodeGen/X86/EmbeddedCG/embedded-codegen-ptx.ll | 28 +++ test/CodeGen/X86/EmbeddedCG/lit.local.cfg | 5 + 13 files changed, 322 insertions(+), 1 deletions(-) create mode 100644 lib/CodeGen/CodeGenIntrinsic.cpp create mode 100644 test/CodeGen/X86/EmbeddedCG/embedded-codegen-ptx.ll create mode 100644 test/CodeGen/X86/EmbeddedCG/lit.local.cfg diff --git a/docs/LangRef.html b/docs/LangRef.html index cfc1c7d..eae069e 100644 --- a/docs/LangRef.html +++ b/docs/LangRef.html @@ -243,6 +243,7 @@
  • 'llvm.prefetch' Intrinsic
  • 'llvm.pcmarker' Intrinsic
  • 'llvm.readcyclecounter' Intrinsic
  • +
  • 'llvm.codegen' Intrinsic
  • Standard C Library Intrinsics @@ -7249,6 +7250,41 @@ LLVM.

    + +

    + 'llvm.codegen' Intrinsic +

    + +
    + +
    Syntax:
    +
    +  declare i8* @llvm.codegen(i8* <IRString>, i8* <MCPU>, i8* <
    +  Features>)
    +
    + +
    Overview:
    +

    The 'llvm.codegen' intrinsic uses the LLVM back ends to generate + code for embedded LLVM-IR strings. The code generation target can be + different to the one of the parent module.

    + +
    Arguments:
    +

    IRString is a string containing LLVM-IR.

    +

    MCPU is the name of the target CPU.

    +

    Features is the string representation of the additional target + features.

    + +
    Semantics:
    +

    The 'llvm.codegen' intrinsic transforms a string containing LLVM IR + to target assembly code. Calls to the intrinsic are replaced by a pointer to + the newly generated target code. In case LLVM can not generate code (e.g. the + target is not available), the call to the intrinsic is replaced by a i8 NULL + pointer.Users of this intrinsic should make sure the target triple is + properly set in the <IRString>. Inputs to both <MCPU> and + <Features> parameters can be null pointers.

    + +
    + diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h index 44c9676..57b3aa2 100644 --- a/include/llvm/CodeGen/Passes.h +++ b/include/llvm/CodeGen/Passes.h @@ -432,6 +432,9 @@ namespace llvm { /// branch folding). extern char &GCMachineCodeAnalysisID; + /// CodeGenIntrinsic Pass - Create target code for embedded LLVM-IR strings. + FunctionPass *createCodeGenIntrinsicPass(); + /// Deleter Pass - Releases GC metadata. /// FunctionPass *createGCInfoDeleter(); diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index e06b892..fe8655e 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -93,6 +93,7 @@ void initializeCorrelatedValuePropagationPass(PassRegistry&); void initializeDAEPass(PassRegistry&); void initializeDAHPass(PassRegistry&); void initializeDCEPass(PassRegistry&); +void initializeCodeGenIntrinsicPass(PassRegistry&); void initializeDSEPass(PassRegistry&); void initializeDeadInstEliminationPass(PassRegistry&); void initializeDeadMachineInstructionElimPass(PassRegistry&); diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td index d3a548c..a60d2bb 100644 --- a/include/llvm/Intrinsics.td +++ b/include/llvm/Intrinsics.td @@ -238,6 +238,10 @@ def int_readcyclecounter : Intrinsic<[llvm_i64_ty]>; // guard to the correct place on the stack frame. def int_stackprotector : Intrinsic<[], [llvm_ptr_ty, llvm_ptrptr_ty], []>; +//===----------------- Code Generation for Embedded LLVM-IR ---------------===// +def int_codegen : Intrinsic<[llvm_ptr_ty], + [llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty]>; + //===------------------- Standard C Library Intrinsics --------------------===// // diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 7a20ff6..8e1ab9a 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -6,6 +6,7 @@ add_llvm_library(LLVMCodeGen CalcSpillWeights.cpp CallingConvLower.cpp CodeGen.cpp + CodeGenIntrinsic.cpp CodePlacementOpt.cpp CriticalAntiDepBreaker.cpp DeadMachineInstructionElim.cpp diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index a53f6f8..702ee18 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -21,6 +21,7 @@ using namespace llvm; void llvm::initializeCodeGen(PassRegistry &Registry) { initializeBranchFolderPassPass(Registry); initializeCalculateSpillWeightsPass(Registry); + initializeCodeGenIntrinsicPass(Registry); initializeCodePlacementOptPass(Registry); initializeDeadMachineInstructionElimPass(Registry); initializeEarlyIfConverterPass(Registry); diff --git a/lib/CodeGen/CodeGenIntrinsic.cpp b/lib/CodeGen/CodeGenIntrinsic.cpp new file mode 100644 index 0000000..cf8aa54 --- /dev/null +++ b/lib/CodeGen/CodeGenIntrinsic.cpp @@ -0,0 +1,227 @@ +//===-- CodeGenIntrinsic.cpp - CodeGen Intrinsic --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the llvm.codegen intrinsic. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/CallingConv.h" +#include "llvm/IRBuilder.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/Assembly/Parser.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/ADT/Triple.h" + +using namespace llvm; + +namespace { + /// ASMGenerator generates target-specific assembly code from LLVM IR. + class ASMGenerator { + public: + ASMGenerator() {} + + /// generate - Generates a target code string from a LLVM IR Value. + bool generate(Value *IRStr, Value *MCPUStr, Value *FeaturesStr, + std::string &ASM); + + private: + bool getStringFromConstantExpr(Value *ConstData, std::string &Out) const; + }; + + /// CodeGenIntrinsic - This pass replaces each call to the llvm.codegen + /// intrinsic with a string generated by ASMGenerator. + class CodeGenIntrinsic : public FunctionPass { + public: + static char ID; + + CodeGenIntrinsic(); + const char *getPassName() const; + virtual bool runOnFunction(Function &F); + }; +} + +// ----------------------------------------------------------------------------- +static bool getTargetMachineFromModule(Module *M, const StringRef &TripleStr, + const StringRef &MCPU, + const StringRef &Features, + TargetMachine *&TM) { + std::string ErrMsg; + const Target *TheTarget = TargetRegistry::lookupTarget(TripleStr, ErrMsg); + if (!TheTarget) { + errs() << ErrMsg << "\n"; + return false; + } + + TargetOptions Options; + TM = TheTarget->createTargetMachine(TripleStr, MCPU, Features, Options); + assert(TM && "Could not allocate target machine!"); + return true; +} + +static bool createASMAsString(Module *New, const StringRef &Triple, + const StringRef &MCPU, const StringRef &Features, + std::string &ASM) { + TargetMachine *Target; + if (!getTargetMachineFromModule(New, Triple, MCPU, Features, Target)) { + return false; + } + + // Build up all of the passes that we want to do to the module. + PassManager PM; + + // Get the data layout of the new module. If it is empty, return false. + const std::string &ModuleDataLayout = New->getDataLayout(); + if (ModuleDataLayout.empty()) + return false; + + { + raw_string_ostream NameROS(ASM); + formatted_raw_ostream FOS(NameROS); + + // Ask the target to add backend passes as necessary. + int UseVerifier = true; + if (Target->addPassesToEmitFile(PM, FOS, TargetMachine::CGFT_AssemblyFile, + UseVerifier)) { + errs() << "CodeGen Intrinsic: target does not support generation of this " + << "file type!\n"; + + return false; + } + + PM.run(*New); + FOS.flush(); + } + + delete Target; + return true; +} + +bool ASMGenerator::getStringFromConstantExpr(Value *ConstData, + std::string &Out) const { + bool Result = false; + if (ConstantExpr *U = dyn_cast(ConstData)) { + Value *R = U->getOperand(0); + if (GlobalVariable *GV = dyn_cast(R)) { + Constant *C = GV->getInitializer(); + if (ConstantDataArray *CA = dyn_cast(C)) { + Out = CA->getAsString(); + Result = true; + } + } + } + return Result; +} + +bool ASMGenerator::generate(Value *IRStr, Value *MCPUStr, Value *FeaturesStr, + std::string &ASM) { + std::string Kernel; + if (!getStringFromConstantExpr(IRStr, Kernel)) + return false; + + std::string MCPU; + if (!getStringFromConstantExpr(MCPUStr, MCPU)) + MCPU = ""; + + std::string Features; + if (!getStringFromConstantExpr(FeaturesStr, Features)) + Features = ""; + + SMDiagnostic ErrorMessage; + LLVMContext Context; + std::auto_ptr TempModule( + ParseAssemblyString(Kernel.c_str(), 0, ErrorMessage, Context)); + + Triple TheTriple(TempModule->getTargetTriple()); + const std::string TripleStr = TheTriple.getTriple(); + if(TripleStr.empty()) { + errs() << "error: Target triple isn't set correctly for the new module.\n"; + return false; + } + + return createASMAsString(TempModule.get(), TripleStr.data(), MCPU.data(), + Features.data(), ASM); +} + +// ----------------------------------------------------------------------------- +INITIALIZE_PASS(CodeGenIntrinsic, "codegen-intrinsic", "CodeGen Intrinsic", + false, false) + +FunctionPass *llvm::createCodeGenIntrinsicPass() { + return new CodeGenIntrinsic(); +} + +char CodeGenIntrinsic::ID = 0; + +CodeGenIntrinsic::CodeGenIntrinsic() + : FunctionPass(ID) { +} + +const char *CodeGenIntrinsic::getPassName() const { + return "Lowering CodeGen Intrinsic."; +} + +bool CodeGenIntrinsic::runOnFunction(Function &F) { + bool MadeChange = false; + Module *M = F.getParent(); + if (Function *CG = M->getFunction("llvm.codegen")) { + for (Function::use_iterator I = CG->use_begin(), E = CG->use_end(); + I != E; ++I) { + if (CallInst *CI = dyn_cast(*I)) { + if (&F != CI->getParent()->getParent()) + continue; + + std::string ASM; + ASMGenerator *Generator = new ASMGenerator(); + IRBuilder<> Builder(CI->getParent(), CI); + Value *St; + if (!Generator->generate(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), ASM)) { + Type *Ty= CG->getReturnType(); + St = Constant::getNullValue(Ty); + } else { + // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? + // Seems that, short of multithreaded LLVM, it should be safe; + // all that is necessary is that a simple Module::iterator loop + // not be invalidated. Appending to the GlobalVariable list is + // safe in that sense. + // + // All the output passes emit globals last. The ExecutionEngine + // explicitly supports adding globals to the module after + // initialization. + // + // Still, if it isn't deemed acceptable, then this + // transformation needs to be a ModulePass (which means it + // cannot be in the 'llc' pipeline (which uses a + // FunctionPassManager (which segfaults (not asserts) if + // provided a ModulePass))). + St = Builder.CreateGlobalStringPtr(ASM, "ASM"); + } + CI->replaceAllUsesWith(St); + CI->eraseFromParent(); + // We should erase the unused globals from current module. But we + // can't do this within a FunctionPass. + MadeChange = true; + } + } + } + + return MadeChange; +} diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 526d994..1de0c63 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -369,6 +369,9 @@ void TargetPassConfig::addIRPasses() { addPass(createGCLoweringPass()); + // Generate target code for embedded LLVM-IR strings. + addPass(createCodeGenIntrinsicPass()); + // Make sure that no unreachable blocks are instruction selected. addPass(createUnreachableBlockEliminationPass()); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 56e774c..97006c0 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5169,6 +5169,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::donothing: // ignore return 0; + case Intrinsic::codegen: + llvm_unreachable("failed to lower codegen intrinsic!"); } } diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt index eb6c779..a54f57c 100644 --- a/lib/Target/LLVMBuild.txt +++ b/lib/Target/LLVMBuild.txt @@ -45,7 +45,7 @@ parent = Libraries type = Library name = Target parent = Libraries -required_libraries = Core MC Support +required_libraries = Core MC Support AsmParser ; This is a special group whose required libraries are extended (by llvm-build) ; with every built target, which makes it easy for tools to include every diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp index 3782957..896772a 100644 --- a/lib/VMCore/Verifier.cpp +++ b/lib/VMCore/Verifier.cpp @@ -1952,6 +1952,16 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) { Assert1(isa(CI.getArgOperand(1)), "llvm.invariant.end parameter #2 must be a constant integer", &CI); break; + case Intrinsic::codegen: + Assert1(isa(CI.getArgOperand(0)), + "llvm.codegen parameter #1 must be a constant expression", &CI); + Assert1(isa(CI.getArgOperand(1)) || + isa(CI.getArgOperand(1)), + "llvm.codegen parameter #2 must be a constant expression", &CI); + Assert1(isa(CI.getArgOperand(2)) || + isa(CI.getArgOperand(2)), + "llvm.codegen parameter #3 must be a constant expression", &CI); + break; } } diff --git a/test/CodeGen/X86/EmbeddedCG/embedded-codegen-ptx.ll b/test/CodeGen/X86/EmbeddedCG/embedded-codegen-ptx.ll new file mode 100644 index 0000000..73d34e1 --- /dev/null +++ b/test/CodeGen/X86/EmbeddedCG/embedded-codegen-ptx.ll @@ -0,0 +1,28 @@ +; RUN: llc < %s -march=x86 | FileCheck %s + +; ModuleID = 'embedded-codegen-ptx.ll' +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128" +target triple = "i386-pc-linux-gnu" + +@llvm_kernel = private unnamed_addr constant [1940 x i8] c"target triple = \22nvptx-unknown-unknown\22\0A\0Adefine internal ptx_kernel void @gpu_codegen.ptx_subfn(i8* %ptx.Array) {\0Aptx.setup:\0A %0 = bitcast i8* %ptx.Array to [128 x [128 x i32]]*\0A %1 = call i32 @llvm.ptx.read.nctaid.x()\0A %2 = zext i32 %1 to i64\0A %3 = call i32 @llvm.ptx.read.nctaid.y()\0A %4 = zext i32 %3 to i64\0A %5 = call i32 @llvm.ptx.read.ntid.x()\0A %6 = zext i32 %5 to i64\0A %7 = call i32 @llvm.ptx.read.ntid.y()\0A %8 = zext i32 %7 to i64\0A %9 = call i32 @llvm.ptx.read.ctaid.x()\0A %10 = zext i32 %9 to i64\0A %11 = call i32 @llvm.ptx.read.ctaid.y()\0A %12 = zext i32 %11 to i64\0A %13 = call i32 @llvm.ptx.read.tid.x()\0A %14 = zext i32 %13 to i64\0A %15 = call i32 @llvm.ptx.read.tid.y()\0A %16 = zext i32 %15 to i64\0A br label %ptx.loop_body\0A\0Aptx.exit: ; preds = %polly.stmt.for.body3\0A ret void\0A\0Aptx.loop_body: ; preds = %ptx.setup\0A %p_gpu_index_i = mul i64 %12, %2\0A %17 = add i64 %p_gpu_index_i, %10\0A %p_gpu_index_j = mul i64 %16, %6\0A %18 = add i64 %p_gpu_index_j, %14\0A br label %polly.stmt.for.body3\0A\0Apolly.stmt.for.body3: ; preds = %ptx.loop_body\0A %19 = trunc i64 %17 to i32\0A %p_mul = shl nsw i32 %19, 7\0A %20 = trunc i64 %18 to i32\0A %p_add = add nsw i32 %p_mul, %20\0A %21 = trunc i64 %17 to i32\0A %22 = trunc i64 %18 to i32\0A %p_arrayidx4 = getelementptr inbounds [128 x [128 x i32]]* %0, i32 0, i32 %21, i32 %22\0A store i32 %p_add, i32* %p_arrayidx4\0A br label %ptx.exit\0A}\0A\0Adeclare i32 @llvm.ptx.read.nctaid.x() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.nctaid.y() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.ctaid.x() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.ctaid.y() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.ntid.x() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.ntid.y() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.tid.x() nounwind readnone\0A\0Adeclare i32 @llvm.ptx.read.tid.y() nounwind readnone\0A\00" + +@.str = private unnamed_addr constant [3 x i8] c"%s\00", align 1 + +define i32 @gpu_codegen() nounwind { +entry: + %0 = call i8* @llvm.codegen(i8* getelementptr inbounds ([1940 x i8]* @llvm_kernel, i32 0, i32 0), i8* null, i8* null) + %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i8* %0) + ret i32 0 +} + +define i32 @main() nounwind { +entry: + %call = call i32 @gpu_codegen() + ret i32 0 +} + +declare i8* @llvm.codegen(i8*, i8*, i8*) nounwind + +declare i32 @printf(i8*, ...) nounwind + +; CHECK: gpu_codegen_2E_ptx_subfn diff --git a/test/CodeGen/X86/EmbeddedCG/lit.local.cfg b/test/CodeGen/X86/EmbeddedCG/lit.local.cfg new file mode 100644 index 0000000..7180c84 --- /dev/null +++ b/test/CodeGen/X86/EmbeddedCG/lit.local.cfg @@ -0,0 +1,5 @@ +config.suffixes = ['.ll', '.c', '.cpp'] + +targets = set(config.root.targets_to_build.split()) +if not 'NVPTX' in targets: + config.unsupported = True -- 1.7.6.5