Set calling convention for CUDA kernel

This patch sets target specific calling convention for CUDA kernels in IR. Patch by Greg Rodgers. Revised and lit test added by Yaxun Liu. Differential Revision: https://reviews.llvm.org/D44747 llvm-svn: 328795
author: Yaxun Liu <Yaxun.Liu@amd.com> 2018-03-29 15:02:08 +0000
committer: Yaxun Liu <Yaxun.Liu@amd.com> 2018-03-29 15:02:08 +0000
commit: b2f2bb26e443d21d57cc1b5ebcfdd86b33417e4d (patch)
tree: 3ce92b4af5f431310dfa5dd6f08eb51567dbde05
parent: 7d0be9aff93c3d7f0332cd77d53c803b1512ee61 (diff)
download: bcm5719-llvm-b2f2bb26e443d21d57cc1b5ebcfdd86b33417e4d.tar.gz
bcm5719-llvm-b2f2bb26e443d21d57cc1b5ebcfdd86b33417e4d.zip
13 files changed, 92 insertions, 17 deletions
diff --git a/clang/include/clang/Basic/Specifiers.h b/clang/include/clang/Basic/Specifiers.h
index 377534baab0..c5d2e26ecf2 100644
--- a/clang/include/clang/Basic/Specifiers.h
+++ b/clang/include/clang/Basic/Specifiers.h
@@ -231,23 +231,24 @@ namespace clang {
 
   /// \brief CallingConv - Specifies the calling convention that a function uses.
   enum CallingConv {
-    CC_C,           // __attribute__((cdecl))
-    CC_X86StdCall,  // __attribute__((stdcall))
-    CC_X86FastCall, // __attribute__((fastcall))
-    CC_X86ThisCall, // __attribute__((thiscall))
+    CC_C,             // __attribute__((cdecl))
+    CC_X86StdCall,    // __attribute__((stdcall))
+    CC_X86FastCall,   // __attribute__((fastcall))
+    CC_X86ThisCall,   // __attribute__((thiscall))
     CC_X86VectorCall, // __attribute__((vectorcall))
-    CC_X86Pascal,   // __attribute__((pascal))
-    CC_Win64,       // __attribute__((ms_abi))
-    CC_X86_64SysV,  // __attribute__((sysv_abi))
-    CC_X86RegCall, // __attribute__((regcall))
-    CC_AAPCS,       // __attribute__((pcs("aapcs")))
-    CC_AAPCS_VFP,   // __attribute__((pcs("aapcs-vfp")))
-    CC_IntelOclBicc, // __attribute__((intel_ocl_bicc))
-    CC_SpirFunction, // default for OpenCL functions on SPIR target
-    CC_OpenCLKernel, // inferred for OpenCL kernels
-    CC_Swift,        // __attribute__((swiftcall))
-    CC_PreserveMost, // __attribute__((preserve_most))
-    CC_PreserveAll,  // __attribute__((preserve_all))
+    CC_X86Pascal,     // __attribute__((pascal))
+    CC_Win64,         // __attribute__((ms_abi))
+    CC_X86_64SysV,    // __attribute__((sysv_abi))
+    CC_X86RegCall,    // __attribute__((regcall))
+    CC_AAPCS,         // __attribute__((pcs("aapcs")))
+    CC_AAPCS_VFP,     // __attribute__((pcs("aapcs-vfp")))
+    CC_IntelOclBicc,  // __attribute__((intel_ocl_bicc))
+    CC_SpirFunction,  // default for OpenCL functions on SPIR target
+    CC_OpenCLKernel,  // inferred for OpenCL kernels
+    CC_Swift,         // __attribute__((swiftcall))
+    CC_PreserveMost,  // __attribute__((preserve_most))
+    CC_PreserveAll,   // __attribute__((preserve_all))
+    CC_CUDAKernel,    // inferred for CUDA kernels
   };
 
   /// \brief Checks whether the given calling convention supports variadic
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index d42d705d090..0f22bf275cb 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -2628,6 +2628,7 @@ StringRef CXXNameMangler::getCallingConvQualifierName(CallingConv CC) {
   case CC_OpenCLKernel:
   case CC_PreserveMost:
   case CC_PreserveAll:
+  case CC_CUDAKernel:
     // FIXME: we should be mangling all of the above.
     return "";
 
diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp
index d8b9967cdc8..021699f3be8 100644
--- a/clang/lib/AST/Type.cpp
+++ b/clang/lib/AST/Type.cpp
@@ -2752,6 +2752,7 @@ StringRef FunctionType::getNameForCallConv(CallingConv CC) {
   case CC_Swift: return "swiftcall";
   case CC_PreserveMost: return "preserve_most";
   case CC_PreserveAll: return "preserve_all";
+  case CC_CUDAKernel: return "cuda_kernel";
   }
 
   llvm_unreachable("Invalid calling convention.");
diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp
index d46f8d3348e..fe67f4e3b27 100644
--- a/clang/lib/AST/TypePrinter.cpp
+++ b/clang/lib/AST/TypePrinter.cpp
@@ -780,6 +780,10 @@ void TypePrinter::printFunctionAfter(const FunctionType::ExtInfo &Info,
     case CC_OpenCLKernel:
       // Do nothing. These CCs are not available as attributes.
       break;
+    case CC_CUDAKernel:
+      // ToDo: print this before the function.
+      OS << " __global__";
+      break;
     case CC_Swift:
       OS << " __attribute__((swiftcall))";
       break;
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 01caadba92d..148a12eac84 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -64,6 +64,7 @@ unsigned CodeGenTypes::ClangCallConvToLLVMCallConv(CallingConv CC) {
   case CC_PreserveMost: return llvm::CallingConv::PreserveMost;
   case CC_PreserveAll: return llvm::CallingConv::PreserveAll;
   case CC_Swift: return llvm::CallingConv::Swift;
+  case CC_CUDAKernel: return CGM.getTargetCodeGenInfo().getCUDAKernelCallingConv();
   }
 }
 
diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index 77d832cc45b..42c0210e1df 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -1022,6 +1022,9 @@ static unsigned getDwarfCC(CallingConv CC) {
     return llvm::dwarf::DW_CC_LLVM_PreserveAll;
   case CC_X86RegCall:
     return llvm::dwarf::DW_CC_LLVM_X86RegCall;
+  case CC_CUDAKernel:
+    // ToDo: Add llvm::dwarf::DW_CC_LLVM_CUDAKernel;
+    return 0;
   }
   return 0;
 }
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index b6f265a3579..02dd21483b4 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -431,6 +431,10 @@ unsigned TargetCodeGenInfo::getOpenCLKernelCallingConv() const {
   return llvm::CallingConv::SPIR_KERNEL;
 }
 
+unsigned TargetCodeGenInfo::getCUDAKernelCallingConv() const {
+  return llvm::CallingConv::C;
+}
+
 llvm::Constant *TargetCodeGenInfo::getNullPointer(const CodeGen::CodeGenModule &CGM,
     llvm::PointerType *T, QualType QT) const {
   return llvm::ConstantPointerNull::get(T);
@@ -7635,6 +7639,7 @@ public:
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &M) const override;
   unsigned getOpenCLKernelCallingConv() const override;
+  unsigned getCUDAKernelCallingConv() const override;
 
   llvm::Constant *getNullPointer(const CodeGen::CodeGenModule &CGM,
       llvm::PointerType *T, QualType QT) const override;
@@ -7722,6 +7727,10 @@ unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const {
   return llvm::CallingConv::AMDGPU_KERNEL;
 }
 
+unsigned AMDGPUTargetCodeGenInfo::getCUDAKernelCallingConv() const {
+  return llvm::CallingConv::AMDGPU_KERNEL;
+}
+
 // Currently LLVM assumes null pointers always have value 0,
 // which results in incorrectly transformed IR. Therefore, instead of
 // emitting null pointers in private and local address spaces, a null
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index 533e14514ff..ab463c5dc6e 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -223,6 +223,9 @@ public:
   /// Get LLVM calling convention for OpenCL kernel.
   virtual unsigned getOpenCLKernelCallingConv() const;
 
+  /// Get LLVM calling convention for CUDA kernel.
+  virtual unsigned getCUDAKernelCallingConv() const;
+
   /// Get target specific null pointer.
   /// \param T is the LLVM type of the null pointer.
   /// \param QT is the clang QualType of the null pointer.
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index c0533f7cc7d..d87c21c0b17 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -25,6 +25,7 @@
 #include "clang/AST/ExprObjC.h"
 #include "clang/AST/ExprOpenMP.h"
 #include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/AST/Type.h"
 #include "clang/AST/TypeLoc.h"
 #include "clang/Basic/PartialDiagnostic.h"
 #include "clang/Basic/SourceManager.h"
@@ -1657,6 +1658,16 @@ Sema::BuildDeclRefExpr(ValueDecl *D, QualType Ty, ExprValueKind VK,
       isa<VarDecl>(D) &&
       NeedToCaptureVariable(cast<VarDecl>(D), NameInfo.getLoc());
 
+  // Drop CUDA kernel calling convention since it is invisible to the user
+  // in DRE.
+  if (const auto *FT = Ty->getAs<FunctionType>()) {
+    if (FT->getCallConv() == CC_CUDAKernel) {
+      FT = Context.adjustFunctionType(FT,
+                                      FT->getExtInfo().withCallingConv(CC_C));
+      Ty = QualType(FT, Ty.getQualifiers().getAsOpaqueValue());
+    }
+  }
+
   DeclRefExpr *E;
   if (isa<VarTemplateSpecializationDecl>(D)) {
     VarTemplateSpecializationDecl *VarSpec =
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index dea1c2efe9d..e917bcc1cc2 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -1481,7 +1481,6 @@ bool Sema::IsFunctionConversion(QualType FromType, QualType ToType,
                  .getTypePtr());
       Changed = true;
     }
-
     // Convert FromFPT's ExtParameterInfo if necessary. The conversion is valid
     // only if the ExtParameterInfo lists of the two function prototypes can be
     // merged and the merged list is identical to ToFPT's ExtParameterInfo list.
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index 00bb21ff307..7bcc5b66fb7 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -3316,6 +3316,18 @@ getCCForDeclaratorChunk(Sema &S, Declarator &D,
   CallingConv CC = S.Context.getDefaultCallingConvention(FTI.isVariadic,
                                                          IsCXXInstanceMethod);
 
+  // Attribute AT_CUDAGlobal affects the calling convention for AMDGPU targets.
+  // This is the simplest place to infer calling convention for CUDA kernels.
+  if (S.getLangOpts().CUDA && S.getLangOpts().CUDAIsDevice) {
+    for (const AttributeList *Attr = D.getDeclSpec().getAttributes().getList();
+         Attr; Attr = Attr->getNext()) {
+      if (Attr->getKind() == AttributeList::AT_CUDAGlobal) {
+        CC = CC_CUDAKernel;
+        break;
+      }
+    }
+  }
+
   // Attribute AT_OpenCLKernel affects the calling convention for SPIR
   // and AMDGPU targets, hence it cannot be treated as a calling
   // convention attribute. This is the simplest place to infer
diff --git a/clang/test/CodeGenCUDA/kernel-amdgcn.cu b/clang/test/CodeGenCUDA/kernel-amdgcn.cu
new file mode 100644
index 00000000000..a7369f901b9
--- /dev/null
+++ b/clang/test/CodeGenCUDA/kernel-amdgcn.cu
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -triple amdgcn -fcuda-is-device -emit-llvm %s -o - | FileCheck %s
+#include "Inputs/cuda.h"
+
+// CHECK: define amdgpu_kernel void @_ZN1A6kernelEv
+class A {
+public:
+  static __global__ void kernel(){}
+};
+
+// CHECK: define void @_Z10non_kernelv
+__device__ void non_kernel(){}
+
+// CHECK: define amdgpu_kernel void @_Z6kerneli
+__global__ void kernel(int x) {
+  non_kernel();
+}
+
+// CHECK: define amdgpu_kernel void @_Z15template_kernelI1AEvT_
+template<class T>
+__global__ void template_kernel(T x) {}
+
+void launch(void *f);
+
+int main() {
+  launch((void*)A::kernel);
+  launch((void*)kernel);
+  launch((void*)template_kernel<A>);
+  return 0;
+}
diff --git a/clang/tools/libclang/CXType.cpp b/clang/tools/libclang/CXType.cpp
index dfc01524776..25bbde0c8a2 100644
--- a/clang/tools/libclang/CXType.cpp
+++ b/clang/tools/libclang/CXType.cpp
@@ -626,6 +626,7 @@ CXCallingConv clang_getFunctionTypeCallingConv(CXType X) {
       TCALLINGCONV(PreserveAll);
     case CC_SpirFunction: return CXCallingConv_Unexposed;
     case CC_OpenCLKernel: return CXCallingConv_Unexposed;
+    case CC_CUDAKernel: return CXCallingConv_Unexposed;
       break;
     }
 #undef TCALLINGCONV
author	Yaxun Liu <Yaxun.Liu@amd.com>	2018-03-29 15:02:08 +0000
committer	Yaxun Liu <Yaxun.Liu@amd.com>	2018-03-29 15:02:08 +0000
commit	b2f2bb26e443d21d57cc1b5ebcfdd86b33417e4d (patch)
tree	3ce92b4af5f431310dfa5dd6f08eb51567dbde05
parent	7d0be9aff93c3d7f0332cd77d53c803b1512ee61 (diff)
download	bcm5719-llvm-b2f2bb26e443d21d57cc1b5ebcfdd86b33417e4d.tar.gz bcm5719-llvm-b2f2bb26e443d21d57cc1b5ebcfdd86b33417e4d.zip