diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/include/llvm/IR/IntrinsicsNVVM.td | 18 | ||||
| -rw-r--r-- | llvm/test/CodeGen/NVPTX/intrin-nocapture.ll | 21 | 
2 files changed, 30 insertions, 9 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index 1853c9988b4..6b853001e77 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -815,36 +815,36 @@ def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty],  //   of pointer to another type of pointer, while the address space remains  //   the same.  def int_nvvm_ptr_local_to_gen: Intrinsic<[llvm_anyptr_ty], -                 [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>], +                 [llvm_anyptr_ty], [IntrNoMem],                   "llvm.nvvm.ptr.local.to.gen">;  def int_nvvm_ptr_shared_to_gen: Intrinsic<[llvm_anyptr_ty], -                 [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>], +                 [llvm_anyptr_ty], [IntrNoMem],                   "llvm.nvvm.ptr.shared.to.gen">;  def int_nvvm_ptr_global_to_gen: Intrinsic<[llvm_anyptr_ty], -                 [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>], +                 [llvm_anyptr_ty], [IntrNoMem],                   "llvm.nvvm.ptr.global.to.gen">;  def int_nvvm_ptr_constant_to_gen: Intrinsic<[llvm_anyptr_ty], -                 [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>], +                 [llvm_anyptr_ty], [IntrNoMem],                   "llvm.nvvm.ptr.constant.to.gen">;  def int_nvvm_ptr_gen_to_global: Intrinsic<[llvm_anyptr_ty], -                 [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>], +                 [llvm_anyptr_ty], [IntrNoMem],                   "llvm.nvvm.ptr.gen.to.global">;  def int_nvvm_ptr_gen_to_shared: Intrinsic<[llvm_anyptr_ty], -                 [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>], +                 [llvm_anyptr_ty], [IntrNoMem],                   "llvm.nvvm.ptr.gen.to.shared">;  def int_nvvm_ptr_gen_to_local: Intrinsic<[llvm_anyptr_ty], -                 [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>], +                 [llvm_anyptr_ty], [IntrNoMem],                   "llvm.nvvm.ptr.gen.to.local">;  def int_nvvm_ptr_gen_to_constant: Intrinsic<[llvm_anyptr_ty], -                 [llvm_anyptr_ty], [IntrNoMem, NoCapture<0>], +                 [llvm_anyptr_ty], [IntrNoMem],                   "llvm.nvvm.ptr.gen.to.constant">;  // Used in nvvm internally to help address space opt and ptx code generation  // This is for params that are passed to kernel functions by pointer by-val.  def int_nvvm_ptr_gen_to_param: Intrinsic<[llvm_anyptr_ty],                                       [llvm_anyptr_ty], -                                   [IntrNoMem, NoCapture<0>], +                                   [IntrNoMem],                                     "llvm.nvvm.ptr.gen.to.param">;  // Move intrinsics, used in nvvm internally diff --git a/llvm/test/CodeGen/NVPTX/intrin-nocapture.ll b/llvm/test/CodeGen/NVPTX/intrin-nocapture.ll new file mode 100644 index 00000000000..55781bb15a0 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/intrin-nocapture.ll @@ -0,0 +1,21 @@ +; RUN: opt < %s -O3 -S | FileCheck %s + +; Address space intrinsics were erroneously marked NoCapture, leading to bad +; optimizations (such as the store below being eliminated as dead code). This +; test makes sure we don't regress. + +declare void @foo(i32 addrspace(1)*) + +declare i32 addrspace(1)* @llvm.nvvm.ptr.gen.to.global.p1i32.p0i32(i32*) + +; CHECK: @bar +define void @bar() { +  %t1 = alloca i32 +; CHECK: call i32 addrspace(1)* @llvm.nvvm.ptr.gen.to.global.p1i32.p0i32(i32* %t1) +; CHECK-NEXT: store i32 10, i32* %t1 +  %t2 = call i32 addrspace(1)* @llvm.nvvm.ptr.gen.to.global.p1i32.p0i32(i32* %t1) +  store i32 10, i32* %t1 +  call void @foo(i32 addrspace(1)* %t2) +  ret void +} +  | 

