diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/NVPTX/CMakeLists.txt | 1 | ||||
-rw-r--r-- | llvm/lib/Target/NVPTX/NVPTX.h | 1 | ||||
-rw-r--r-- | llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp | 115 | ||||
-rw-r--r-- | llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 9 |
4 files changed, 122 insertions, 4 deletions
diff --git a/llvm/lib/Target/NVPTX/CMakeLists.txt b/llvm/lib/Target/NVPTX/CMakeLists.txt index d48a7a9b1fc..99e950eba80 100644 --- a/llvm/lib/Target/NVPTX/CMakeLists.txt +++ b/llvm/lib/Target/NVPTX/CMakeLists.txt @@ -21,6 +21,7 @@ set(NVPTXCodeGen_sources NVPTXInstrInfo.cpp NVPTXLowerAggrCopies.cpp NVPTXLowerKernelArgs.cpp + NVPTXLowerAlloca.cpp NVPTXMCExpr.cpp NVPTXPrologEpilogPass.cpp NVPTXRegisterInfo.cpp diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h index 477b0bac6ca..28ae3e8f393 100644 --- a/llvm/lib/Target/NVPTX/NVPTX.h +++ b/llvm/lib/Target/NVPTX/NVPTX.h @@ -70,6 +70,7 @@ MachineFunctionPass *createNVPTXPrologEpilogPass(); MachineFunctionPass *createNVPTXReplaceImageHandlesPass(); FunctionPass *createNVPTXImageOptimizerPass(); FunctionPass *createNVPTXLowerKernelArgsPass(const NVPTXTargetMachine *TM); +BasicBlockPass *createNVPTXLowerAllocaPass(); bool isImageOrSamplerVal(const Value *, const Module *); diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp new file mode 100644 index 00000000000..93d0025d8f5 --- /dev/null +++ b/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp @@ -0,0 +1,115 @@ +//===-- NVPTXLowerAlloca.cpp - Make alloca to use local memory =====--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// For all alloca instructions, and add a pair of cast to local address for +// each of them. For example, +// +// %A = alloca i32 +// store i32 0, i32* %A ; emits st.u32 +// +// will be transformed to +// +// %A = alloca i32 +// %Local = addrspacecast i32* %A to i32 addrspace(5)* +// %Generic = addrspacecast i32 addrspace(5)* %A to i32* +// store i32 0, i32 addrspace(5)* %Generic ; emits st.local.u32 +// +// And we will rely on NVPTXFavorNonGenericAddrSpace to combine the last +// two instructions. +// +//===----------------------------------------------------------------------===// + +#include "NVPTX.h" +#include "NVPTXUtilities.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/Pass.h" + +using namespace llvm; + +namespace llvm { +void initializeNVPTXLowerAllocaPass(PassRegistry &); +} + +namespace { +class NVPTXLowerAlloca : public BasicBlockPass { + bool runOnBasicBlock(BasicBlock &BB) override; + +public: + static char ID; // Pass identification, replacement for typeid + NVPTXLowerAlloca() : BasicBlockPass(ID) {} + const char *getPassName() const override { + return "convert address space of alloca'ed memory to local"; + } +}; +} // namespace + +char NVPTXLowerAlloca::ID = 1; + +INITIALIZE_PASS(NVPTXLowerAlloca, "nvptx-lower-alloca", + "Lower Alloca", false, false) + +// ============================================================================= +// Main function for this pass. +// ============================================================================= +bool NVPTXLowerAlloca::runOnBasicBlock(BasicBlock &BB) { + bool Changed = false; + for (auto &I : BB) { + if (auto allocaInst = dyn_cast<AllocaInst>(&I)) { + Changed = true; + auto PTy = dyn_cast<PointerType>(allocaInst->getType()); + auto ETy = PTy->getElementType(); + auto LocalAddrTy = PointerType::get(ETy, ADDRESS_SPACE_LOCAL); + auto NewASCToLocal = new AddrSpaceCastInst(allocaInst, LocalAddrTy, ""); + auto GenericAddrTy = PointerType::get(ETy, ADDRESS_SPACE_GENERIC); + auto NewASCToGeneric = new AddrSpaceCastInst(NewASCToLocal, + GenericAddrTy, ""); + NewASCToLocal->insertAfter(allocaInst); + NewASCToGeneric->insertAfter(NewASCToLocal); + for (Value::use_iterator UI = allocaInst->use_begin(), + UE = allocaInst->use_end(); + UI != UE; ) { + // Check Load, Store, GEP, and BitCast Uses on alloca and make them + // use the converted generic address, in order to expose non-generic + // addrspacecast to NVPTXFavorNonGenericAddrSpace. For other types + // of instructions this is unecessary and may introduce redudant + // address cast. + const auto &AllocaUse = *UI++; + auto LI = dyn_cast<LoadInst>(AllocaUse.getUser()); + if (LI && LI->getPointerOperand() == allocaInst && !LI->isVolatile()) { + LI->setOperand(LI->getPointerOperandIndex(), NewASCToGeneric); + continue; + } + auto SI = dyn_cast<StoreInst>(AllocaUse.getUser()); + if (SI && SI->getPointerOperand() == allocaInst && !SI->isVolatile()) { + SI->setOperand(SI->getPointerOperandIndex(), NewASCToGeneric); + continue; + } + auto GI = dyn_cast<GetElementPtrInst>(AllocaUse.getUser()); + if (GI && GI->getPointerOperand() == allocaInst) { + GI->setOperand(GI->getPointerOperandIndex(), NewASCToGeneric); + continue; + } + auto BI = dyn_cast<BitCastInst>(AllocaUse.getUser()); + if (BI && BI->getOperand(0) == allocaInst) { + BI->setOperand(0, NewASCToGeneric); + continue; + } + } + } + } + return Changed; +} + +BasicBlockPass *llvm::createNVPTXLowerAllocaPass() { + return new NVPTXLowerAlloca(); +} diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index e0b193ddf46..c071ee82abc 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -54,6 +54,7 @@ void initializeNVPTXAllocaHoistingPass(PassRegistry &); void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &); void initializeNVPTXLowerKernelArgsPass(PassRegistry &); +void initializeNVPTXLowerAllocaPass(PassRegistry &); } extern "C" void LLVMInitializeNVPTXTarget() { @@ -70,6 +71,7 @@ extern "C" void LLVMInitializeNVPTXTarget() { initializeNVPTXFavorNonGenericAddrSpacesPass( *PassRegistry::getPassRegistry()); initializeNVPTXLowerKernelArgsPass(*PassRegistry::getPassRegistry()); + initializeNVPTXLowerAllocaPass(*PassRegistry::getPassRegistry()); } static std::string computeDataLayout(bool is64Bit) { @@ -166,12 +168,11 @@ void NVPTXPassConfig::addIRPasses() { addPass(createNVPTXAssignValidGlobalNamesPass()); addPass(createGenericToNVVMPass()); addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine())); - addPass(createNVPTXFavorNonGenericAddrSpacesPass()); // NVPTXLowerKernelArgs emits alloca for byval parameters which can often - // be eliminated by SROA. We do not run SROA right after NVPTXLowerKernelArgs - // because we plan to merge NVPTXLowerKernelArgs and - // NVPTXFavorNonGenericAddrSpaces into one pass. + // be eliminated by SROA. addPass(createSROAPass()); + addPass(createNVPTXLowerAllocaPass()); + addPass(createNVPTXFavorNonGenericAddrSpacesPass()); // FavorNonGenericAddrSpaces shortcuts unnecessary addrspacecasts, and leave // them unused. We could remove dead code in an ad-hoc manner, but that // requires manual work and might be error-prone. |