summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
-rw-r--r--llvm/test/CodeGen/AMDGPU/alloca.ll12
-rw-r--r--llvm/test/CodeGen/AMDGPU/fence-barrier.ll35
-rw-r--r--llvm/test/CodeGen/AMDGPU/invalid-alloca.ll16
-rw-r--r--llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir26
4 files changed, 59 insertions, 30 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/alloca.ll b/llvm/test/CodeGen/AMDGPU/alloca.ll
new file mode 100644
index 00000000000..fd42e925f3a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/alloca.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-as -data-layout=A5 < %s | llvm-dis | FileCheck %s
+; RUN: llc -mtriple amdgcn-amd-amdhsa-amdgiz < %s
+; RUN: llvm-as -data-layout=A5 < %s | llc -mtriple amdgcn-amd-amdhsa-amdgiz
+; RUN: opt -data-layout=A5 -S < %s
+; RUN: llvm-as -data-layout=A5 < %s | opt -S
+
+; CHECK: %tmp = alloca i32, addrspace(5)
+define amdgpu_kernel void @test() {
+ %tmp = alloca i32, addrspace(5)
+ ret void
+}
+
diff --git a/llvm/test/CodeGen/AMDGPU/fence-barrier.ll b/llvm/test/CodeGen/AMDGPU/fence-barrier.ll
index f140a762942..8c6c17e5e57 100644
--- a/llvm/test/CodeGen/AMDGPU/fence-barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/fence-barrier.ll
@@ -1,4 +1,5 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -enable-si-insert-waitcnts=1 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx803 -enable-si-insert-waitcnts=1 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
+; RUN: llvm-as -data-layout=A5 < %s | llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx803 -enable-si-insert-waitcnts=1 -verify-machineinstrs | FileCheck --check-prefix=GCN %s
declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
declare i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr()
@@ -16,8 +17,8 @@ declare void @llvm.amdgcn.s.barrier()
; GCN-NEXT: s_barrier
; GCN: flat_store_dword
define amdgpu_kernel void @test_local(i32 addrspace(1)*) {
- %2 = alloca i32 addrspace(1)*, align 4
- store i32 addrspace(1)* %0, i32 addrspace(1)** %2, align 4
+ %2 = alloca i32 addrspace(1)*, align 4, addrspace(5)
+ store i32 addrspace(1)* %0, i32 addrspace(1)* addrspace(5)* %2, align 4
%3 = call i32 @llvm.amdgcn.workitem.id.x()
%4 = zext i32 %3 to i64
%5 = icmp eq i64 %4, 0
@@ -32,7 +33,7 @@ define amdgpu_kernel void @test_local(i32 addrspace(1)*) {
call void @llvm.amdgcn.s.barrier()
fence syncscope("workgroup") acquire
%8 = load i32, i32 addrspace(3)* getelementptr inbounds ([1 x i32], [1 x i32] addrspace(3)* @test_local.temp, i64 0, i64 0), align 4
- %9 = load i32 addrspace(1)*, i32 addrspace(1)** %2, align 4
+ %9 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %2, align 4
%10 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
%11 = call i32 @llvm.amdgcn.workitem.id.x()
%12 = call i32 @llvm.amdgcn.workgroup.id.x()
@@ -58,14 +59,14 @@ define amdgpu_kernel void @test_local(i32 addrspace(1)*) {
; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}}
; GCN-NEXT: s_barrier
define amdgpu_kernel void @test_global(i32 addrspace(1)*) {
- %2 = alloca i32 addrspace(1)*, align 4
- %3 = alloca i32, align 4
- store i32 addrspace(1)* %0, i32 addrspace(1)** %2, align 4
- store i32 0, i32* %3, align 4
+ %2 = alloca i32 addrspace(1)*, align 4, addrspace(5)
+ %3 = alloca i32, align 4, addrspace(5)
+ store i32 addrspace(1)* %0, i32 addrspace(1)* addrspace(5)* %2, align 4
+ store i32 0, i32 addrspace(5)* %3, align 4
br label %4
; <label>:4: ; preds = %58, %1
- %5 = load i32, i32* %3, align 4
+ %5 = load i32, i32 addrspace(5)* %3, align 4
%6 = sext i32 %5 to i64
%7 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
%8 = call i32 @llvm.amdgcn.workitem.id.x()
@@ -101,8 +102,8 @@ define amdgpu_kernel void @test_global(i32 addrspace(1)*) {
%36 = add i64 %35, %33
%37 = add i64 %36, 2184
%38 = trunc i64 %37 to i32
- %39 = load i32 addrspace(1)*, i32 addrspace(1)** %2, align 4
- %40 = load i32, i32* %3, align 4
+ %39 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %2, align 4
+ %40 = load i32, i32 addrspace(5)* %3, align 4
%41 = sext i32 %40 to i64
%42 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
%43 = call i32 @llvm.amdgcn.workitem.id.x()
@@ -127,9 +128,9 @@ define amdgpu_kernel void @test_global(i32 addrspace(1)*) {
br label %58
; <label>:58: ; preds = %22
- %59 = load i32, i32* %3, align 4
+ %59 = load i32, i32 addrspace(5)* %3, align 4
%60 = add nsw i32 %59, 1
- store i32 %60, i32* %3, align 4
+ store i32 %60, i32 addrspace(5)* %3, align 4
br label %4
; <label>:61: ; preds = %4
@@ -143,9 +144,9 @@ define amdgpu_kernel void @test_global(i32 addrspace(1)*) {
; GCN-NEXT: s_barrier
; GCN: flat_store_dword
define amdgpu_kernel void @test_global_local(i32 addrspace(1)*) {
- %2 = alloca i32 addrspace(1)*, align 4
- store i32 addrspace(1)* %0, i32 addrspace(1)** %2, align 4
- %3 = load i32 addrspace(1)*, i32 addrspace(1)** %2, align 4
+ %2 = alloca i32 addrspace(1)*, align 4, addrspace(5)
+ store i32 addrspace(1)* %0, i32 addrspace(1)* addrspace(5)* %2, align 4
+ %3 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %2, align 4
%4 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
%5 = call i32 @llvm.amdgcn.workitem.id.x()
%6 = call i32 @llvm.amdgcn.workgroup.id.x()
@@ -176,7 +177,7 @@ define amdgpu_kernel void @test_global_local(i32 addrspace(1)*) {
call void @llvm.amdgcn.s.barrier()
fence syncscope("workgroup") acquire
%24 = load i32, i32 addrspace(3)* getelementptr inbounds ([1 x i32], [1 x i32] addrspace(3)* @test_global_local.temp, i64 0, i64 0), align 4
- %25 = load i32 addrspace(1)*, i32 addrspace(1)** %2, align 4
+ %25 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %2, align 4
%26 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
%27 = call i32 @llvm.amdgcn.workitem.id.x()
%28 = call i32 @llvm.amdgcn.workgroup.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/invalid-alloca.ll b/llvm/test/CodeGen/AMDGPU/invalid-alloca.ll
new file mode 100644
index 00000000000..043ab46716b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/invalid-alloca.ll
@@ -0,0 +1,16 @@
+; RUN: not llvm-as -data-layout=A5 < %s 2>&1 | FileCheck -check-prefixes=COMMON,AS %s
+; RUN: not llc -mtriple amdgcn-amd-amdhsa-amdgiz < %s 2>&1 | FileCheck -check-prefixes=COMMON,LLC %s
+; RUN: llvm-as < %s | not llc -mtriple amdgcn-amd-amdhsa-amdgiz 2>&1 | FileCheck -check-prefixes=COMMON,LLC %s
+; RUN: not opt -data-layout=A5 -S < %s 2>&1 | FileCheck -check-prefixes=COMMON,LLC %s
+; RUN: llvm-as < %s | not opt -data-layout=A5 2>&1 | FileCheck -check-prefixes=COMMON,LLC %s
+
+; AS: assembly parsed, but does not verify as correct!
+; COMMON: Allocation instruction pointer not in the stack address space!
+; COMMON: %tmp = alloca i32
+; LLC: error: input module is broken!
+
+define amdgpu_kernel void @test() {
+ %tmp = alloca i32
+ ret void
+}
+
diff --git a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
index a89011a0cce..bf1fdca5a82 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
@@ -1,4 +1,4 @@
-# RUN: llc -mtriple=amdgcn-amd-amdhsa-opencl -verify-machineinstrs -run-pass=machine-scheduler -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgizcl -verify-machineinstrs -run-pass=machine-scheduler -o - %s | FileCheck %s
--- |
%struct.widget.0 = type { float, i32, i32 }
@@ -14,7 +14,7 @@
define amdgpu_kernel void @sched_dbg_value_crash(i8 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture readonly %arg1, %struct.widget.0 addrspace(1)* nocapture readonly %arg2, %struct.baz addrspace(1)* nocapture readonly %arg3, %struct.snork addrspace(1)* nocapture %arg4) local_unnamed_addr #2 {
bb:
%0 = getelementptr i32, i32 addrspace(1)* %arg1, i64 0, !amdgpu.uniform !3, !amdgpu.noclobber !3
- %tmp5 = alloca %struct.wombat, align 16
+ %tmp5 = alloca %struct.wombat, align 16, addrspace(5)
%1 = call noalias nonnull dereferenceable(64) i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr()
%2 = bitcast i8 addrspace(2)* %1 to i32 addrspace(2)*
%3 = getelementptr inbounds i32, i32 addrspace(2)* %2, i64 1
@@ -48,9 +48,9 @@
%tmp20 = shl nuw nsw i64 %tmp19, 2
%tmp21 = getelementptr inbounds i8, i8 addrspace(1)* %arg, i64 %tmp20
%tmp22 = bitcast i8 addrspace(1)* %tmp21 to %struct.wombat.1 addrspace(1)*
- %tmp23 = bitcast %struct.wombat* %tmp5 to i8*
- call void @llvm.lifetime.start.p0i8(i64 144, i8* nonnull %tmp23) #3
- %tmp24 = getelementptr inbounds %struct.wombat, %struct.wombat* %tmp5, i32 0, i32 6
+ %tmp23 = bitcast %struct.wombat addrspace(5)* %tmp5 to i8 addrspace(5)*
+ call void @llvm.lifetime.start.p5i8(i64 144, i8 addrspace(5)* nonnull %tmp23) #3
+ %tmp24 = getelementptr inbounds %struct.wombat, %struct.wombat addrspace(5)* %tmp5, i32 0, i32 6
%tmp25 = getelementptr i32, i32 addrspace(1)* %arg1, i64 3, !amdgpu.uniform !3, !amdgpu.noclobber !3
%tmp26 = load i32, i32 addrspace(1)* %tmp25, align 4
%tmp27 = zext i32 %tmp26 to i64
@@ -103,7 +103,7 @@
%tmp74 = insertelement <4 x float> <float undef, float undef, float undef, float 0.000000e+00>, float %tmp69, i32 0
%tmp75 = insertelement <4 x float> %tmp74, float %tmp71, i32 1
%tmp76 = insertelement <4 x float> %tmp75, float %tmp73, i32 2
- store <4 x float> %tmp76, <4 x float>* %tmp24, align 16
+ store <4 x float> %tmp76, <4 x float> addrspace(5)* %tmp24, align 16
%tmp77 = fsub float undef, %tmp60
%tmp78 = fsub float undef, %tmp61
%tmp79 = extractelement <4 x float> %tmp66, i32 2
@@ -125,26 +125,26 @@
%tmp94 = call float @llvm.fmuladd.f32(float %tmp92, float %tmp36, float %tmp93)
%tmp95 = call float @llvm.fmuladd.f32(float %tmp48, float undef, float %tmp94)
%tmp96 = fsub float extractelement (<2 x float> fadd (<2 x float> fmul (<2 x float> undef, <2 x float> undef), <2 x float> undef), i64 1), %tmp95
- %tmp97 = getelementptr inbounds %struct.wombat, %struct.wombat* %tmp5, i32 0, i32 8, i32 1
- call void @func(float %tmp96, i64 0, i16* nonnull %tmp97) #3
+ %tmp97 = getelementptr inbounds %struct.wombat, %struct.wombat addrspace(5)* %tmp5, i32 0, i32 8, i32 1
+ call void @func(float %tmp96, i64 0, i16 addrspace(5)* nonnull %tmp97) #3
%tmp984 = bitcast [16 x i8] addrspace(3)* %17 to i8 addrspace(3)*
%tmp99 = getelementptr inbounds %struct.snork, %struct.snork addrspace(1)* %arg4, i64 %tmp12, i32 8, i32 1, i64 0
call void @llvm.memcpy.p1i8.p3i8.i64(i8 addrspace(1)* %tmp99, i8 addrspace(3)* %tmp984, i64 16, i32 16, i1 false)
- call void @llvm.lifetime.end.p0i8(i64 144, i8* nonnull %tmp23) #3
+ call void @llvm.lifetime.end.p5i8(i64 144, i8 addrspace(5)* nonnull %tmp23) #3
ret void
}
- declare void @func(float, i64, i16*)
- declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #0
+ declare void @func(float, i64, i16 addrspace(5)*)
+ declare void @llvm.lifetime.start.p5i8(i64, i8 addrspace(5)* nocapture) #0
declare float @llvm.fmuladd.f32(float, float, float) #1
- declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #0
+ declare void @llvm.lifetime.end.p5i8(i64, i8 addrspace(5)* nocapture) #0
declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #1
declare i32 @llvm.amdgcn.workitem.id.x() #1
declare void @llvm.dbg.value(metadata, metadata, metadata) #1
declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #1
declare i32 @llvm.amdgcn.workitem.id.y() #1
declare i32 @llvm.amdgcn.workitem.id.z() #1
- declare void @llvm.memcpy.p1i8.p0i8.i64(i8 addrspace(1)* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #0
+ declare void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)* nocapture writeonly, i8 addrspace(5)* nocapture readonly, i64, i32, i1) #0
declare void @llvm.memcpy.p1i8.p3i8.i64(i8 addrspace(1)* nocapture writeonly, i8 addrspace(3)* nocapture readonly, i64, i32, i1) #0
attributes #0 = { argmemonly nounwind }
OpenPOWER on IntegriCloud