summaryrefslogtreecommitdiffstats
path: root/clang/lib/CodeGen/CGCUDABuiltin.cpp
diff options
context:
space:
mode:
authorJustin Lebar <jlebar@google.com>2016-01-28 23:58:28 +0000
committerJustin Lebar <jlebar@google.com>2016-01-28 23:58:28 +0000
commitc0e42750da5f4eaecb00ce46e4a5cae8e4cddc3d (patch)
tree920ac67d901e453e47d7aa7e796b546df4d3e846 /clang/lib/CodeGen/CGCUDABuiltin.cpp
parentbb04f6e28fe49046975297846548dc99e68c82b9 (diff)
downloadbcm5719-llvm-c0e42750da5f4eaecb00ce46e4a5cae8e4cddc3d.tar.gz
bcm5719-llvm-c0e42750da5f4eaecb00ce46e4a5cae8e4cddc3d.zip
[CUDA] Generate CUDA's printf alloca in its function's entry block.
Summary: This is necessary to prevent llvm from generating stacksave intrinsics around this alloca. NVVM doesn't have a stack, and we don't handle said intrinsics. Reviewers: rnk, echristo Subscribers: cfe-commits, jhen, tra Differential Revision: http://reviews.llvm.org/D16664 llvm-svn: 259122
Diffstat (limited to 'clang/lib/CodeGen/CGCUDABuiltin.cpp')
-rw-r--r--clang/lib/CodeGen/CGCUDABuiltin.cpp57
1 files changed, 18 insertions, 39 deletions
diff --git a/clang/lib/CodeGen/CGCUDABuiltin.cpp b/clang/lib/CodeGen/CGCUDABuiltin.cpp
index 6b9b97090a0..0ccba8982a6 100644
--- a/clang/lib/CodeGen/CGCUDABuiltin.cpp
+++ b/clang/lib/CodeGen/CGCUDABuiltin.cpp
@@ -52,10 +52,13 @@ static llvm::Function *GetVprintfDeclaration(llvm::Module &M) {
//
// is converted into something resembling
//
-// char* buf = alloca(...);
-// *reinterpret_cast<Arg1*>(buf) = arg1;
-// *reinterpret_cast<Arg2*>(buf + ...) = arg2;
-// *reinterpret_cast<Arg3*>(buf + ...) = arg3;
+// struct Tmp {
+// Arg1 a1;
+// Arg2 a2;
+// Arg3 a3;
+// };
+// char* buf = alloca(sizeof(Tmp));
+// *(Tmp*)buf = {a1, a2, a3};
// vprintf("format string", buf);
//
// buf is aligned to the max of {alignof(Arg1), ...}. Furthermore, each of the
@@ -80,48 +83,24 @@ CodeGenFunction::EmitCUDADevicePrintfCallExpr(const CallExpr *E,
E->arguments(), E->getDirectCallee(),
/* ParamsToSkip = */ 0);
- // Figure out how large of a buffer we need to hold our varargs and how
- // aligned the buffer needs to be. We start iterating at Arg[1], because
- // that's our first vararg.
- unsigned BufSize = 0;
- unsigned BufAlign = 0;
- for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I) {
- const RValue& RV = Args[I].RV;
- llvm::Type* Ty = RV.getScalarVal()->getType();
-
- auto Align = DL.getPrefTypeAlignment(Ty);
- BufAlign = std::max(BufAlign, Align);
- // Add padding required to keep the current arg aligned.
- BufSize = llvm::alignTo(BufSize, Align);
- BufSize += DL.getTypeAllocSize(Ty);
- }
-
- // Construct and fill the buffer.
- llvm::Value* BufferPtr = nullptr;
- if (BufSize == 0) {
+ // Construct and fill the args buffer that we'll pass to vprintf.
+ llvm::Value *BufferPtr;
+ if (Args.size() <= 1) {
// If there are no args, pass a null pointer to vprintf.
BufferPtr = llvm::ConstantPointerNull::get(llvm::Type::getInt8PtrTy(Ctx));
} else {
- BufferPtr = Builder.Insert(new llvm::AllocaInst(
- llvm::Type::getInt8Ty(Ctx), llvm::ConstantInt::get(Int32Ty, BufSize),
- BufAlign, "printf_arg_buf"));
+ llvm::SmallVector<llvm::Type *, 8> ArgTypes;
+ for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I)
+ ArgTypes.push_back(Args[I].RV.getScalarVal()->getType());
+ llvm::Type *AllocaTy = llvm::StructType::create(ArgTypes, "printf_args");
+ llvm::Value *Alloca = CreateTempAlloca(AllocaTy);
- unsigned Offset = 0;
for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I) {
+ llvm::Value *P = Builder.CreateStructGEP(AllocaTy, Alloca, I - 1);
llvm::Value *Arg = Args[I].RV.getScalarVal();
- llvm::Type *Ty = Arg->getType();
- auto Align = DL.getPrefTypeAlignment(Ty);
-
- // Pad the buffer to Arg's alignment.
- Offset = llvm::alignTo(Offset, Align);
-
- // Store Arg into the buffer at Offset.
- llvm::Value *GEP =
- Builder.CreateGEP(BufferPtr, llvm::ConstantInt::get(Int32Ty, Offset));
- llvm::Value *Cast = Builder.CreateBitCast(GEP, Ty->getPointerTo());
- Builder.CreateAlignedStore(Arg, Cast, Align);
- Offset += DL.getTypeAllocSize(Ty);
+ Builder.CreateAlignedStore(Arg, P, DL.getPrefTypeAlignment(Arg->getType()));
}
+ BufferPtr = Builder.CreatePointerCast(Alloca, llvm::Type::getInt8PtrTy(Ctx));
}
// Invoke vprintf and return.
OpenPOWER on IntegriCloud