summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/NVPTX/CMakeLists.txt1
-rw-r--r--llvm/lib/Target/NVPTX/NVPTX.h1
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp115
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp9
4 files changed, 122 insertions, 4 deletions
diff --git a/llvm/lib/Target/NVPTX/CMakeLists.txt b/llvm/lib/Target/NVPTX/CMakeLists.txt
index d48a7a9b1fc..99e950eba80 100644
--- a/llvm/lib/Target/NVPTX/CMakeLists.txt
+++ b/llvm/lib/Target/NVPTX/CMakeLists.txt
@@ -21,6 +21,7 @@ set(NVPTXCodeGen_sources
NVPTXInstrInfo.cpp
NVPTXLowerAggrCopies.cpp
NVPTXLowerKernelArgs.cpp
+ NVPTXLowerAlloca.cpp
NVPTXMCExpr.cpp
NVPTXPrologEpilogPass.cpp
NVPTXRegisterInfo.cpp
diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h
index 477b0bac6ca..28ae3e8f393 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/llvm/lib/Target/NVPTX/NVPTX.h
@@ -70,6 +70,7 @@ MachineFunctionPass *createNVPTXPrologEpilogPass();
MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
FunctionPass *createNVPTXImageOptimizerPass();
FunctionPass *createNVPTXLowerKernelArgsPass(const NVPTXTargetMachine *TM);
+BasicBlockPass *createNVPTXLowerAllocaPass();
bool isImageOrSamplerVal(const Value *, const Module *);
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
new file mode 100644
index 00000000000..93d0025d8f5
--- /dev/null
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
@@ -0,0 +1,115 @@
+//===-- NVPTXLowerAlloca.cpp - Make alloca to use local memory =====--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// For all alloca instructions, and add a pair of cast to local address for
+// each of them. For example,
+//
+// %A = alloca i32
+// store i32 0, i32* %A ; emits st.u32
+//
+// will be transformed to
+//
+// %A = alloca i32
+// %Local = addrspacecast i32* %A to i32 addrspace(5)*
+// %Generic = addrspacecast i32 addrspace(5)* %A to i32*
+// store i32 0, i32 addrspace(5)* %Generic ; emits st.local.u32
+//
+// And we will rely on NVPTXFavorNonGenericAddrSpace to combine the last
+// two instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTX.h"
+#include "NVPTXUtilities.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+namespace llvm {
+void initializeNVPTXLowerAllocaPass(PassRegistry &);
+}
+
+namespace {
+class NVPTXLowerAlloca : public BasicBlockPass {
+ bool runOnBasicBlock(BasicBlock &BB) override;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ NVPTXLowerAlloca() : BasicBlockPass(ID) {}
+ const char *getPassName() const override {
+ return "convert address space of alloca'ed memory to local";
+ }
+};
+} // namespace
+
+char NVPTXLowerAlloca::ID = 1;
+
+INITIALIZE_PASS(NVPTXLowerAlloca, "nvptx-lower-alloca",
+ "Lower Alloca", false, false)
+
+// =============================================================================
+// Main function for this pass.
+// =============================================================================
+bool NVPTXLowerAlloca::runOnBasicBlock(BasicBlock &BB) {
+ bool Changed = false;
+ for (auto &I : BB) {
+ if (auto allocaInst = dyn_cast<AllocaInst>(&I)) {
+ Changed = true;
+ auto PTy = dyn_cast<PointerType>(allocaInst->getType());
+ auto ETy = PTy->getElementType();
+ auto LocalAddrTy = PointerType::get(ETy, ADDRESS_SPACE_LOCAL);
+ auto NewASCToLocal = new AddrSpaceCastInst(allocaInst, LocalAddrTy, "");
+ auto GenericAddrTy = PointerType::get(ETy, ADDRESS_SPACE_GENERIC);
+ auto NewASCToGeneric = new AddrSpaceCastInst(NewASCToLocal,
+ GenericAddrTy, "");
+ NewASCToLocal->insertAfter(allocaInst);
+ NewASCToGeneric->insertAfter(NewASCToLocal);
+ for (Value::use_iterator UI = allocaInst->use_begin(),
+ UE = allocaInst->use_end();
+ UI != UE; ) {
+ // Check Load, Store, GEP, and BitCast Uses on alloca and make them
+ // use the converted generic address, in order to expose non-generic
+ // addrspacecast to NVPTXFavorNonGenericAddrSpace. For other types
+ // of instructions this is unecessary and may introduce redudant
+ // address cast.
+ const auto &AllocaUse = *UI++;
+ auto LI = dyn_cast<LoadInst>(AllocaUse.getUser());
+ if (LI && LI->getPointerOperand() == allocaInst && !LI->isVolatile()) {
+ LI->setOperand(LI->getPointerOperandIndex(), NewASCToGeneric);
+ continue;
+ }
+ auto SI = dyn_cast<StoreInst>(AllocaUse.getUser());
+ if (SI && SI->getPointerOperand() == allocaInst && !SI->isVolatile()) {
+ SI->setOperand(SI->getPointerOperandIndex(), NewASCToGeneric);
+ continue;
+ }
+ auto GI = dyn_cast<GetElementPtrInst>(AllocaUse.getUser());
+ if (GI && GI->getPointerOperand() == allocaInst) {
+ GI->setOperand(GI->getPointerOperandIndex(), NewASCToGeneric);
+ continue;
+ }
+ auto BI = dyn_cast<BitCastInst>(AllocaUse.getUser());
+ if (BI && BI->getOperand(0) == allocaInst) {
+ BI->setOperand(0, NewASCToGeneric);
+ continue;
+ }
+ }
+ }
+ }
+ return Changed;
+}
+
+BasicBlockPass *llvm::createNVPTXLowerAllocaPass() {
+ return new NVPTXLowerAlloca();
+}
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index e0b193ddf46..c071ee82abc 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -54,6 +54,7 @@ void initializeNVPTXAllocaHoistingPass(PassRegistry &);
void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
void initializeNVPTXLowerKernelArgsPass(PassRegistry &);
+void initializeNVPTXLowerAllocaPass(PassRegistry &);
}
extern "C" void LLVMInitializeNVPTXTarget() {
@@ -70,6 +71,7 @@ extern "C" void LLVMInitializeNVPTXTarget() {
initializeNVPTXFavorNonGenericAddrSpacesPass(
*PassRegistry::getPassRegistry());
initializeNVPTXLowerKernelArgsPass(*PassRegistry::getPassRegistry());
+ initializeNVPTXLowerAllocaPass(*PassRegistry::getPassRegistry());
}
static std::string computeDataLayout(bool is64Bit) {
@@ -166,12 +168,11 @@ void NVPTXPassConfig::addIRPasses() {
addPass(createNVPTXAssignValidGlobalNamesPass());
addPass(createGenericToNVVMPass());
addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine()));
- addPass(createNVPTXFavorNonGenericAddrSpacesPass());
// NVPTXLowerKernelArgs emits alloca for byval parameters which can often
- // be eliminated by SROA. We do not run SROA right after NVPTXLowerKernelArgs
- // because we plan to merge NVPTXLowerKernelArgs and
- // NVPTXFavorNonGenericAddrSpaces into one pass.
+ // be eliminated by SROA.
addPass(createSROAPass());
+ addPass(createNVPTXLowerAllocaPass());
+ addPass(createNVPTXFavorNonGenericAddrSpacesPass());
// FavorNonGenericAddrSpaces shortcuts unnecessary addrspacecasts, and leave
// them unused. We could remove dead code in an ad-hoc manner, but that
// requires manual work and might be error-prone.
OpenPOWER on IntegriCloud