diff options
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r-- | llvm/test/CodeGen/NVPTX/bug21465.ll | 11 | ||||
-rw-r--r-- | llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll | 7 | ||||
-rw-r--r-- | llvm/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll | 20 | ||||
-rw-r--r-- | llvm/test/CodeGen/NVPTX/pr13291-i1-store.ll | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/NVPTX/surf-read-cuda.ll | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/NVPTX/tex-read-cuda.ll | 8 |
6 files changed, 44 insertions, 18 deletions
diff --git a/llvm/test/CodeGen/NVPTX/bug21465.ll b/llvm/test/CodeGen/NVPTX/bug21465.ll index 76af386c651..c375cf8d580 100644 --- a/llvm/test/CodeGen/NVPTX/bug21465.ll +++ b/llvm/test/CodeGen/NVPTX/bug21465.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -nvptx-lower-struct-args -S | FileCheck %s +; RUN: opt < %s -nvptx-lower-kernel-args -S | FileCheck %s +; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 | FileCheck %s --check-prefix PTX target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" target triple = "nvptx64-unknown-unknown" @@ -8,11 +9,13 @@ target triple = "nvptx64-unknown-unknown" ; Function Attrs: nounwind define void @_Z11TakesStruct1SPi(%struct.S* byval nocapture readonly %input, i32* nocapture %output) #0 { entry: -; CHECK-LABEL @_Z22TakesStruct1SPi -; CHECK: bitcast %struct.S* %input to i8* -; CHECK: call i8 addrspace(101)* @llvm.nvvm.ptr.gen.to.param.p101i8.p0i8 +; CHECK-LABEL: @_Z11TakesStruct1SPi +; PTX-LABEL: .visible .entry _Z11TakesStruct1SPi( +; CHECK: addrspacecast %struct.S* %input to %struct.S addrspace(101)* %b = getelementptr inbounds %struct.S, %struct.S* %input, i64 0, i32 1 %0 = load i32, i32* %b, align 4 +; PTX: ld.param.u32 %r{{[0-9]+}}, {{\[}}[[BASE:%rd[0-9]+]]{{\]}} +; PTX-NEXT: ld.param.u32 %r{{[0-9]+}}, {{\[}}[[BASE]]+4{{\]}} store i32 %0, i32* %output, align 4 ret void } diff --git a/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll b/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll index 58b19112991..c70670da13d 100644 --- a/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll +++ b/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll @@ -24,7 +24,10 @@ entry: ; CHECK: cvta.local.u64 %SP, %rd[[BUF_REG]] ; CHECK: ld.param.u64 %rd[[A_REG:[0-9]+]], [kernel_func_param_0] -; CHECK: ld.f32 %f[[A0_REG:[0-9]+]], [%rd[[A_REG]]] +; CHECK: cvta.to.global.u64 %rd[[A1_REG:[0-9]+]], %rd[[A_REG]] +; FIXME: casting A1_REG to A2_REG is unnecessary; A2_REG is essentially A_REG +; CHECK: cvta.global.u64 %rd[[A2_REG:[0-9]+]], %rd[[A1_REG]] +; CHECK: ld.global.f32 %f[[A0_REG:[0-9]+]], [%rd[[A1_REG]]] ; CHECK: st.f32 [%SP+0], %f[[A0_REG]] %0 = load float, float* %a, align 4 @@ -48,7 +51,7 @@ entry: ; CHECK: add.u64 %rd[[SP_REG:[0-9]+]], %SP, 0 ; CHECK: .param .b64 param0; -; CHECK-NEXT: st.param.b64 [param0+0], %rd[[A_REG]] +; CHECK-NEXT: st.param.b64 [param0+0], %rd[[A2_REG]] ; CHECK-NEXT: .param .b64 param1; ; CHECK-NEXT: st.param.b64 [param1+0], %rd[[SP_REG]] ; CHECK-NEXT: call.uni diff --git a/llvm/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll b/llvm/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll new file mode 100644 index 00000000000..53220bd905b --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/lower-kernel-ptr-arg.ll @@ -0,0 +1,20 @@ +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" +target triple = "nvptx64-unknown-unknown" + +; Verify that both %input and %output are converted to global pointers and then +; addrspacecast'ed back to the original type. +define void @kernel(float* %input, float* %output) { +; CHECK-LABEL: .visible .entry kernel( +; CHECK: cvta.to.global.u64 +; CHECK: cvta.to.global.u64 + %1 = load float, float* %input, align 4 +; CHECK: ld.global.f32 + store float %1, float* %output, align 4 +; CHECK: st.global.f32 + ret void +} + +!nvvm.annotations = !{!0} +!0 = !{void (float*, float*)* @kernel, !"kernel", i32 1} diff --git a/llvm/test/CodeGen/NVPTX/pr13291-i1-store.ll b/llvm/test/CodeGen/NVPTX/pr13291-i1-store.ll index d4f7c3bd210..934df30a3a7 100644 --- a/llvm/test/CodeGen/NVPTX/pr13291-i1-store.ll +++ b/llvm/test/CodeGen/NVPTX/pr13291-i1-store.ll @@ -3,19 +3,19 @@ define ptx_kernel void @t1(i1* %a) { ; PTX32: mov.u16 %rs{{[0-9]+}}, 0; -; PTX32-NEXT: st.u8 [%r{{[0-9]+}}], %rs{{[0-9]+}}; +; PTX32-NEXT: st.global.u8 [%r{{[0-9]+}}], %rs{{[0-9]+}}; ; PTX64: mov.u16 %rs{{[0-9]+}}, 0; -; PTX64-NEXT: st.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}; +; PTX64-NEXT: st.global.u8 [%rd{{[0-9]+}}], %rs{{[0-9]+}}; store i1 false, i1* %a ret void } define ptx_kernel void @t2(i1* %a, i8* %b) { -; PTX32: ld.u8 %rs{{[0-9]+}}, [%r{{[0-9]+}}] +; PTX32: ld.global.u8 %rs{{[0-9]+}}, [%r{{[0-9]+}}] ; PTX32: and.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, 1; ; PTX32: setp.eq.b16 %p{{[0-9]+}}, %rs{{[0-9]+}}, 1; -; PTX64: ld.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] +; PTX64: ld.global.u8 %rs{{[0-9]+}}, [%rd{{[0-9]+}}] ; PTX64: and.b16 %rs{{[0-9]+}}, %rs{{[0-9]+}}, 1; ; PTX64: setp.eq.b16 %p{{[0-9]+}}, %rs{{[0-9]+}}, 1; diff --git a/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll b/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll index ed021346c0f..c17c71e01d3 100644 --- a/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll +++ b/llvm/test/CodeGen/NVPTX/surf-read-cuda.ll @@ -18,8 +18,8 @@ define void @foo(i64 %img, float* %red, i32 %idx) { ; SM20: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]] ; SM30: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]] %ret = sitofp i32 %val to float -; SM20: st.f32 [%r{{[0-9]+}}], %f[[REDF]] -; SM30: st.f32 [%r{{[0-9]+}}], %f[[REDF]] +; SM20: st.global.f32 [%r{{[0-9]+}}], %f[[REDF]] +; SM30: st.global.f32 [%r{{[0-9]+}}], %f[[REDF]] store float %ret, float* %red ret void } @@ -37,8 +37,8 @@ define void @bar(float* %red, i32 %idx) { ; SM20: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]] ; SM30: cvt.rn.f32.s32 %f[[REDF:[0-9]+]], %r[[RED]] %ret = sitofp i32 %val to float -; SM20: st.f32 [%r{{[0-9]+}}], %f[[REDF]] -; SM30: st.f32 [%r{{[0-9]+}}], %f[[REDF]] +; SM20: st.global.f32 [%r{{[0-9]+}}], %f[[REDF]] +; SM30: st.global.f32 [%r{{[0-9]+}}], %f[[REDF]] store float %ret, float* %red ret void } diff --git a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll index c5b5600de87..d5f7c1667f1 100644 --- a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll +++ b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll @@ -16,8 +16,8 @@ define void @foo(i64 %img, float* %red, i32 %idx) { ; SM30: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXREG]], {%r{{[0-9]+}}}] %val = tail call { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64 %img, i32 %idx) %ret = extractvalue { float, float, float, float } %val, 0 -; SM20: st.f32 [%r{{[0-9]+}}], %f[[RED]] -; SM30: st.f32 [%r{{[0-9]+}}], %f[[RED]] +; SM20: st.global.f32 [%r{{[0-9]+}}], %f[[RED]] +; SM30: st.global.f32 [%r{{[0-9]+}}], %f[[RED]] store float %ret, float* %red ret void } @@ -34,8 +34,8 @@ define void @bar(float* %red, i32 %idx) { ; SM30: tex.1d.v4.f32.s32 {%f[[RED:[0-9]+]], %f[[GREEN:[0-9]+]], %f[[BLUE:[0-9]+]], %f[[ALPHA:[0-9]+]]}, [%rd[[TEXHANDLE]], {%r{{[0-9]+}}}] %val = tail call { float, float, float, float } @llvm.nvvm.tex.unified.1d.v4f32.s32(i64 %texHandle, i32 %idx) %ret = extractvalue { float, float, float, float } %val, 0 -; SM20: st.f32 [%r{{[0-9]+}}], %f[[RED]] -; SM30: st.f32 [%r{{[0-9]+}}], %f[[RED]] +; SM20: st.global.f32 [%r{{[0-9]+}}], %f[[RED]] +; SM30: st.global.f32 [%r{{[0-9]+}}], %f[[RED]] store float %ret, float* %red ret void } |