summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/NVPTX/MachineSink-convergent.ll4
-rw-r--r--llvm/test/CodeGen/NVPTX/TailDuplication-convergent.ll4
-rw-r--r--llvm/test/CodeGen/NVPTX/access-non-generic.ll12
-rw-r--r--llvm/test/CodeGen/NVPTX/noduplicate-syncthreads.ll8
-rw-r--r--llvm/test/Feature/intrinsic-noduplicate.ll6
-rw-r--r--llvm/test/Transforms/FunctionAttrs/convergent.ll6
6 files changed, 20 insertions, 20 deletions
diff --git a/llvm/test/CodeGen/NVPTX/MachineSink-convergent.ll b/llvm/test/CodeGen/NVPTX/MachineSink-convergent.ll
index c06fe224688..91c80182e2f 100644
--- a/llvm/test/CodeGen/NVPTX/MachineSink-convergent.ll
+++ b/llvm/test/CodeGen/NVPTX/MachineSink-convergent.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s | FileCheck %s
target triple = "nvptx64-nvidia-cuda"
-declare void @llvm.cuda.syncthreads()
+declare void @llvm.nvvm.barrier0()
; Load a value, then syncthreads. Branch, and use the loaded value only on one
; side of the branch. The load shouldn't be sunk beneath the call, because
@@ -11,7 +11,7 @@ Start:
; CHECK: ld.u32
%ptr_val = load i32, i32* %ptr
; CHECK: bar.sync
- call void @llvm.cuda.syncthreads()
+ call void @llvm.nvvm.barrier0()
br i1 %cond, label %L1, label %L2
L1:
%ptr_val2 = add i32 %ptr_val, 100
diff --git a/llvm/test/CodeGen/NVPTX/TailDuplication-convergent.ll b/llvm/test/CodeGen/NVPTX/TailDuplication-convergent.ll
index 02b562d85b9..fc6867eca41 100644
--- a/llvm/test/CodeGen/NVPTX/TailDuplication-convergent.ll
+++ b/llvm/test/CodeGen/NVPTX/TailDuplication-convergent.ll
@@ -2,7 +2,7 @@
target triple = "nvptx64-nvidia-cuda"
declare void @foo()
-declare void @llvm.cuda.syncthreads()
+declare void @llvm.nvvm.barrier0()
; syncthreads shouldn't be duplicated.
; CHECK: .func call_syncthreads
@@ -20,7 +20,7 @@ L2:
store i32 1, i32* %a
br label %L42
L42:
- call void @llvm.cuda.syncthreads()
+ call void @llvm.nvvm.barrier0()
br label %Ret
}
diff --git a/llvm/test/CodeGen/NVPTX/access-non-generic.ll b/llvm/test/CodeGen/NVPTX/access-non-generic.ll
index 8645ae612d4..3cd5a922508 100644
--- a/llvm/test/CodeGen/NVPTX/access-non-generic.ll
+++ b/llvm/test/CodeGen/NVPTX/access-non-generic.ll
@@ -34,7 +34,7 @@ define void @ld_st_shared_f32(i32 %i, float %v) {
store float %v, float* addrspacecast (float addrspace(3)* @scalar to float*), align 4
; PTX: st.shared.f32 [scalar], %f{{[0-9]+}};
; use syncthreads to disable optimizations across components
- call void @llvm.cuda.syncthreads()
+ call void @llvm.nvvm.barrier0()
; PTX: bar.sync 0;
; cast; load
@@ -45,7 +45,7 @@ define void @ld_st_shared_f32(i32 %i, float %v) {
; cast; store
store float %v, float* %2, align 4
; PTX: st.shared.f32 [scalar], %f{{[0-9]+}};
- call void @llvm.cuda.syncthreads()
+ call void @llvm.nvvm.barrier0()
; PTX: bar.sync 0;
; load gep cast
@@ -55,7 +55,7 @@ define void @ld_st_shared_f32(i32 %i, float %v) {
; store gep cast
store float %v, float* getelementptr inbounds ([10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5), align 4
; PTX: st.shared.f32 [array+20], %f{{[0-9]+}};
- call void @llvm.cuda.syncthreads()
+ call void @llvm.nvvm.barrier0()
; PTX: bar.sync 0;
; gep cast; load
@@ -66,7 +66,7 @@ define void @ld_st_shared_f32(i32 %i, float %v) {
; gep cast; store
store float %v, float* %5, align 4
; PTX: st.shared.f32 [array+20], %f{{[0-9]+}};
- call void @llvm.cuda.syncthreads()
+ call void @llvm.nvvm.barrier0()
; PTX: bar.sync 0;
; cast; gep; load
@@ -78,7 +78,7 @@ define void @ld_st_shared_f32(i32 %i, float %v) {
; cast; gep; store
store float %v, float* %8, align 4
; PTX: st.shared.f32 [%{{(r|rl|rd)[0-9]+}}], %f{{[0-9]+}};
- call void @llvm.cuda.syncthreads()
+ call void @llvm.nvvm.barrier0()
; PTX: bar.sync 0;
ret void
@@ -181,7 +181,7 @@ exit:
ret void
}
-declare void @llvm.cuda.syncthreads() #3
+declare void @llvm.nvvm.barrier0() #3
declare void @use(float)
diff --git a/llvm/test/CodeGen/NVPTX/noduplicate-syncthreads.ll b/llvm/test/CodeGen/NVPTX/noduplicate-syncthreads.ll
index 2fec31b3791..ca7fb6eddfe 100644
--- a/llvm/test/CodeGen/NVPTX/noduplicate-syncthreads.ll
+++ b/llvm/test/CodeGen/NVPTX/noduplicate-syncthreads.ll
@@ -3,8 +3,8 @@
; Make sure the call to syncthreads is not duplicate here by the LLVM
; optimizations, because it has the noduplicate attribute set.
-; CHECK: call void @llvm.cuda.syncthreads
-; CHECK-NOT: call void @llvm.cuda.syncthreads
+; CHECK: call void @llvm.nvvm.barrier0
+; CHECK-NOT: call void @llvm.nvvm.barrier0
; Function Attrs: nounwind
define void @foo(float* %output) #1 {
@@ -37,7 +37,7 @@ if.else: ; preds = %entry
br label %if.end
if.end: ; preds = %if.else, %if.then
- call void @llvm.cuda.syncthreads()
+ call void @llvm.nvvm.barrier0()
%6 = load float*, float** %output.addr, align 8
%arrayidx6 = getelementptr inbounds float, float* %6, i64 0
%7 = load float, float* %arrayidx6, align 4
@@ -68,7 +68,7 @@ if.end17: ; preds = %if.else13, %if.then
}
; Function Attrs: noduplicate nounwind
-declare void @llvm.cuda.syncthreads() #2
+declare void @llvm.nvvm.barrier0() #2
!0 = !{void (float*)* @foo, !"kernel", i32 1}
!1 = !{null, !"align", i32 8}
diff --git a/llvm/test/Feature/intrinsic-noduplicate.ll b/llvm/test/Feature/intrinsic-noduplicate.ll
index 370026223e8..4f2ae1c698c 100644
--- a/llvm/test/Feature/intrinsic-noduplicate.ll
+++ b/llvm/test/Feature/intrinsic-noduplicate.ll
@@ -1,9 +1,9 @@
; RUN: llvm-as < %s | llvm-dis | FileCheck %s
; Make sure LLVM knows about the convergent attribute on the
-; llvm.cuda.syncthreads intrinsic.
+; llvm.nvvm.barrier0 intrinsic.
-declare void @llvm.cuda.syncthreads()
+declare void @llvm.nvvm.barrier0()
-; CHECK: declare void @llvm.cuda.syncthreads() #[[ATTRNUM:[0-9]+]]
+; CHECK: declare void @llvm.nvvm.barrier0() #[[ATTRNUM:[0-9]+]]
; CHECK: attributes #[[ATTRNUM]] = { convergent nounwind }
diff --git a/llvm/test/Transforms/FunctionAttrs/convergent.ll b/llvm/test/Transforms/FunctionAttrs/convergent.ll
index bc21d85ec22..37886b82b87 100644
--- a/llvm/test/Transforms/FunctionAttrs/convergent.ll
+++ b/llvm/test/Transforms/FunctionAttrs/convergent.ll
@@ -59,15 +59,15 @@ define i32 @indirect_non_convergent_call(i32 ()* %f) convergent norecurse {
; CHECK: Function Attrs
; CHECK-SAME: convergent
-; CHECK-NEXT: declare void @llvm.cuda.syncthreads()
-declare void @llvm.cuda.syncthreads() convergent
+; CHECK-NEXT: declare void @llvm.nvvm.barrier0()
+declare void @llvm.nvvm.barrier0() convergent
; CHECK: Function Attrs
; CHECK-SAME: convergent
; CHECK-NEXT: define i32 @intrinsic()
define i32 @intrinsic() convergent {
; Implicitly convergent, because the intrinsic is convergent.
- call void @llvm.cuda.syncthreads()
+ call void @llvm.nvvm.barrier0()
ret i32 0
}
OpenPOWER on IntegriCloud