diff options
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/alloca.ll | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fence-barrier.ll | 35 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/invalid-alloca.ll | 16 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir | 26 |
4 files changed, 59 insertions, 30 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/alloca.ll b/llvm/test/CodeGen/AMDGPU/alloca.ll new file mode 100644 index 00000000000..fd42e925f3a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/alloca.ll @@ -0,0 +1,12 @@ +; RUN: llvm-as -data-layout=A5 < %s | llvm-dis | FileCheck %s +; RUN: llc -mtriple amdgcn-amd-amdhsa-amdgiz < %s +; RUN: llvm-as -data-layout=A5 < %s | llc -mtriple amdgcn-amd-amdhsa-amdgiz +; RUN: opt -data-layout=A5 -S < %s +; RUN: llvm-as -data-layout=A5 < %s | opt -S + +; CHECK: %tmp = alloca i32, addrspace(5) +define amdgpu_kernel void @test() { + %tmp = alloca i32, addrspace(5) + ret void +} + diff --git a/llvm/test/CodeGen/AMDGPU/fence-barrier.ll b/llvm/test/CodeGen/AMDGPU/fence-barrier.ll index f140a762942..8c6c17e5e57 100644 --- a/llvm/test/CodeGen/AMDGPU/fence-barrier.ll +++ b/llvm/test/CodeGen/AMDGPU/fence-barrier.ll @@ -1,4 +1,5 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -enable-si-insert-waitcnts=1 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx803 -enable-si-insert-waitcnts=1 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s +; RUN: llvm-as -data-layout=A5 < %s | llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx803 -enable-si-insert-waitcnts=1 -verify-machineinstrs | FileCheck --check-prefix=GCN %s declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() declare i8 addrspace(2)* @llvm.amdgcn.implicitarg.ptr() @@ -16,8 +17,8 @@ declare void @llvm.amdgcn.s.barrier() ; GCN-NEXT: s_barrier ; GCN: flat_store_dword define amdgpu_kernel void @test_local(i32 addrspace(1)*) { - %2 = alloca i32 addrspace(1)*, align 4 - store i32 addrspace(1)* %0, i32 addrspace(1)** %2, align 4 + %2 = alloca i32 addrspace(1)*, align 4, addrspace(5) + store i32 addrspace(1)* %0, i32 addrspace(1)* addrspace(5)* %2, align 4 %3 = call i32 @llvm.amdgcn.workitem.id.x() %4 = zext i32 %3 to i64 %5 = icmp eq i64 %4, 0 @@ -32,7 +33,7 @@ define amdgpu_kernel void @test_local(i32 addrspace(1)*) { call void @llvm.amdgcn.s.barrier() fence syncscope("workgroup") acquire %8 = load i32, i32 addrspace(3)* getelementptr inbounds ([1 x i32], [1 x i32] addrspace(3)* @test_local.temp, i64 0, i64 0), align 4 - %9 = load i32 addrspace(1)*, i32 addrspace(1)** %2, align 4 + %9 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %2, align 4 %10 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() %11 = call i32 @llvm.amdgcn.workitem.id.x() %12 = call i32 @llvm.amdgcn.workgroup.id.x() @@ -58,14 +59,14 @@ define amdgpu_kernel void @test_local(i32 addrspace(1)*) { ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} ; GCN-NEXT: s_barrier define amdgpu_kernel void @test_global(i32 addrspace(1)*) { - %2 = alloca i32 addrspace(1)*, align 4 - %3 = alloca i32, align 4 - store i32 addrspace(1)* %0, i32 addrspace(1)** %2, align 4 - store i32 0, i32* %3, align 4 + %2 = alloca i32 addrspace(1)*, align 4, addrspace(5) + %3 = alloca i32, align 4, addrspace(5) + store i32 addrspace(1)* %0, i32 addrspace(1)* addrspace(5)* %2, align 4 + store i32 0, i32 addrspace(5)* %3, align 4 br label %4 ; <label>:4: ; preds = %58, %1 - %5 = load i32, i32* %3, align 4 + %5 = load i32, i32 addrspace(5)* %3, align 4 %6 = sext i32 %5 to i64 %7 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() %8 = call i32 @llvm.amdgcn.workitem.id.x() @@ -101,8 +102,8 @@ define amdgpu_kernel void @test_global(i32 addrspace(1)*) { %36 = add i64 %35, %33 %37 = add i64 %36, 2184 %38 = trunc i64 %37 to i32 - %39 = load i32 addrspace(1)*, i32 addrspace(1)** %2, align 4 - %40 = load i32, i32* %3, align 4 + %39 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %2, align 4 + %40 = load i32, i32 addrspace(5)* %3, align 4 %41 = sext i32 %40 to i64 %42 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() %43 = call i32 @llvm.amdgcn.workitem.id.x() @@ -127,9 +128,9 @@ define amdgpu_kernel void @test_global(i32 addrspace(1)*) { br label %58 ; <label>:58: ; preds = %22 - %59 = load i32, i32* %3, align 4 + %59 = load i32, i32 addrspace(5)* %3, align 4 %60 = add nsw i32 %59, 1 - store i32 %60, i32* %3, align 4 + store i32 %60, i32 addrspace(5)* %3, align 4 br label %4 ; <label>:61: ; preds = %4 @@ -143,9 +144,9 @@ define amdgpu_kernel void @test_global(i32 addrspace(1)*) { ; GCN-NEXT: s_barrier ; GCN: flat_store_dword define amdgpu_kernel void @test_global_local(i32 addrspace(1)*) { - %2 = alloca i32 addrspace(1)*, align 4 - store i32 addrspace(1)* %0, i32 addrspace(1)** %2, align 4 - %3 = load i32 addrspace(1)*, i32 addrspace(1)** %2, align 4 + %2 = alloca i32 addrspace(1)*, align 4, addrspace(5) + store i32 addrspace(1)* %0, i32 addrspace(1)* addrspace(5)* %2, align 4 + %3 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %2, align 4 %4 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() %5 = call i32 @llvm.amdgcn.workitem.id.x() %6 = call i32 @llvm.amdgcn.workgroup.id.x() @@ -176,7 +177,7 @@ define amdgpu_kernel void @test_global_local(i32 addrspace(1)*) { call void @llvm.amdgcn.s.barrier() fence syncscope("workgroup") acquire %24 = load i32, i32 addrspace(3)* getelementptr inbounds ([1 x i32], [1 x i32] addrspace(3)* @test_global_local.temp, i64 0, i64 0), align 4 - %25 = load i32 addrspace(1)*, i32 addrspace(1)** %2, align 4 + %25 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(5)* %2, align 4 %26 = call i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() %27 = call i32 @llvm.amdgcn.workitem.id.x() %28 = call i32 @llvm.amdgcn.workgroup.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/invalid-alloca.ll b/llvm/test/CodeGen/AMDGPU/invalid-alloca.ll new file mode 100644 index 00000000000..043ab46716b --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/invalid-alloca.ll @@ -0,0 +1,16 @@ +; RUN: not llvm-as -data-layout=A5 < %s 2>&1 | FileCheck -check-prefixes=COMMON,AS %s +; RUN: not llc -mtriple amdgcn-amd-amdhsa-amdgiz < %s 2>&1 | FileCheck -check-prefixes=COMMON,LLC %s +; RUN: llvm-as < %s | not llc -mtriple amdgcn-amd-amdhsa-amdgiz 2>&1 | FileCheck -check-prefixes=COMMON,LLC %s +; RUN: not opt -data-layout=A5 -S < %s 2>&1 | FileCheck -check-prefixes=COMMON,LLC %s +; RUN: llvm-as < %s | not opt -data-layout=A5 2>&1 | FileCheck -check-prefixes=COMMON,LLC %s + +; AS: assembly parsed, but does not verify as correct! +; COMMON: Allocation instruction pointer not in the stack address space! +; COMMON: %tmp = alloca i32 +; LLC: error: input module is broken! + +define amdgpu_kernel void @test() { + %tmp = alloca i32 + ret void +} + diff --git a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir index a89011a0cce..bf1fdca5a82 100644 --- a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir +++ b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir @@ -1,4 +1,4 @@ -# RUN: llc -mtriple=amdgcn-amd-amdhsa-opencl -verify-machineinstrs -run-pass=machine-scheduler -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgizcl -verify-machineinstrs -run-pass=machine-scheduler -o - %s | FileCheck %s --- | %struct.widget.0 = type { float, i32, i32 } @@ -14,7 +14,7 @@ define amdgpu_kernel void @sched_dbg_value_crash(i8 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture readonly %arg1, %struct.widget.0 addrspace(1)* nocapture readonly %arg2, %struct.baz addrspace(1)* nocapture readonly %arg3, %struct.snork addrspace(1)* nocapture %arg4) local_unnamed_addr #2 { bb: %0 = getelementptr i32, i32 addrspace(1)* %arg1, i64 0, !amdgpu.uniform !3, !amdgpu.noclobber !3 - %tmp5 = alloca %struct.wombat, align 16 + %tmp5 = alloca %struct.wombat, align 16, addrspace(5) %1 = call noalias nonnull dereferenceable(64) i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() %2 = bitcast i8 addrspace(2)* %1 to i32 addrspace(2)* %3 = getelementptr inbounds i32, i32 addrspace(2)* %2, i64 1 @@ -48,9 +48,9 @@ %tmp20 = shl nuw nsw i64 %tmp19, 2 %tmp21 = getelementptr inbounds i8, i8 addrspace(1)* %arg, i64 %tmp20 %tmp22 = bitcast i8 addrspace(1)* %tmp21 to %struct.wombat.1 addrspace(1)* - %tmp23 = bitcast %struct.wombat* %tmp5 to i8* - call void @llvm.lifetime.start.p0i8(i64 144, i8* nonnull %tmp23) #3 - %tmp24 = getelementptr inbounds %struct.wombat, %struct.wombat* %tmp5, i32 0, i32 6 + %tmp23 = bitcast %struct.wombat addrspace(5)* %tmp5 to i8 addrspace(5)* + call void @llvm.lifetime.start.p5i8(i64 144, i8 addrspace(5)* nonnull %tmp23) #3 + %tmp24 = getelementptr inbounds %struct.wombat, %struct.wombat addrspace(5)* %tmp5, i32 0, i32 6 %tmp25 = getelementptr i32, i32 addrspace(1)* %arg1, i64 3, !amdgpu.uniform !3, !amdgpu.noclobber !3 %tmp26 = load i32, i32 addrspace(1)* %tmp25, align 4 %tmp27 = zext i32 %tmp26 to i64 @@ -103,7 +103,7 @@ %tmp74 = insertelement <4 x float> <float undef, float undef, float undef, float 0.000000e+00>, float %tmp69, i32 0 %tmp75 = insertelement <4 x float> %tmp74, float %tmp71, i32 1 %tmp76 = insertelement <4 x float> %tmp75, float %tmp73, i32 2 - store <4 x float> %tmp76, <4 x float>* %tmp24, align 16 + store <4 x float> %tmp76, <4 x float> addrspace(5)* %tmp24, align 16 %tmp77 = fsub float undef, %tmp60 %tmp78 = fsub float undef, %tmp61 %tmp79 = extractelement <4 x float> %tmp66, i32 2 @@ -125,26 +125,26 @@ %tmp94 = call float @llvm.fmuladd.f32(float %tmp92, float %tmp36, float %tmp93) %tmp95 = call float @llvm.fmuladd.f32(float %tmp48, float undef, float %tmp94) %tmp96 = fsub float extractelement (<2 x float> fadd (<2 x float> fmul (<2 x float> undef, <2 x float> undef), <2 x float> undef), i64 1), %tmp95 - %tmp97 = getelementptr inbounds %struct.wombat, %struct.wombat* %tmp5, i32 0, i32 8, i32 1 - call void @func(float %tmp96, i64 0, i16* nonnull %tmp97) #3 + %tmp97 = getelementptr inbounds %struct.wombat, %struct.wombat addrspace(5)* %tmp5, i32 0, i32 8, i32 1 + call void @func(float %tmp96, i64 0, i16 addrspace(5)* nonnull %tmp97) #3 %tmp984 = bitcast [16 x i8] addrspace(3)* %17 to i8 addrspace(3)* %tmp99 = getelementptr inbounds %struct.snork, %struct.snork addrspace(1)* %arg4, i64 %tmp12, i32 8, i32 1, i64 0 call void @llvm.memcpy.p1i8.p3i8.i64(i8 addrspace(1)* %tmp99, i8 addrspace(3)* %tmp984, i64 16, i32 16, i1 false) - call void @llvm.lifetime.end.p0i8(i64 144, i8* nonnull %tmp23) #3 + call void @llvm.lifetime.end.p5i8(i64 144, i8 addrspace(5)* nonnull %tmp23) #3 ret void } - declare void @func(float, i64, i16*) - declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #0 + declare void @func(float, i64, i16 addrspace(5)*) + declare void @llvm.lifetime.start.p5i8(i64, i8 addrspace(5)* nocapture) #0 declare float @llvm.fmuladd.f32(float, float, float) #1 - declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #0 + declare void @llvm.lifetime.end.p5i8(i64, i8 addrspace(5)* nocapture) #0 declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #1 declare i32 @llvm.amdgcn.workitem.id.x() #1 declare void @llvm.dbg.value(metadata, metadata, metadata) #1 declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #1 declare i32 @llvm.amdgcn.workitem.id.y() #1 declare i32 @llvm.amdgcn.workitem.id.z() #1 - declare void @llvm.memcpy.p1i8.p0i8.i64(i8 addrspace(1)* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #0 + declare void @llvm.memcpy.p1i8.p5i8.i64(i8 addrspace(1)* nocapture writeonly, i8 addrspace(5)* nocapture readonly, i64, i32, i1) #0 declare void @llvm.memcpy.p1i8.p3i8.i64(i8 addrspace(1)* nocapture writeonly, i8 addrspace(3)* nocapture readonly, i64, i32, i1) #0 attributes #0 = { argmemonly nounwind } |