summaryrefslogtreecommitdiffstats
path: root/clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
blob: cb8a75882d4d2a9e9769e650e79056a7494c9333 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck %s
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -x hip %s -o - | FileCheck -check-prefix=HOST %s

#include "Inputs/cuda.h"

// Coerced struct from `struct S` without all generic pointers lowered into
// global ones.
// CHECK: %struct.S.coerce = type { i32 addrspace(1)*, float addrspace(1)* }
// CHECK: %struct.T.coerce = type { [2 x float addrspace(1)*] }

// On the host-side compilation, generic pointer won't be coerced.
// HOST-NOT: %struct.S.coerce
// HOST-NOT: %struct.T.coerce

// CHECK: define amdgpu_kernel void  @_Z7kernel1Pi(i32 addrspace(1)* %x.coerce)
// HOST: define void @_Z7kernel1Pi.stub(i32* %x)
__global__ void kernel1(int *x) {
  x[0]++;
}

// CHECK: define amdgpu_kernel void  @_Z7kernel2Ri(i32 addrspace(1)* dereferenceable(4) %x.coerce)
// HOST: define void @_Z7kernel2Ri.stub(i32* dereferenceable(4) %x)
__global__ void kernel2(int &x) {
  x++;
}

// CHECK: define amdgpu_kernel void  @_Z7kernel3PU3AS2iPU3AS1i(i32 addrspace(2)* %x, i32 addrspace(1)* %y)
// HOST: define void @_Z7kernel3PU3AS2iPU3AS1i.stub(i32 addrspace(2)* %x, i32 addrspace(1)* %y)
__global__ void kernel3(__attribute__((address_space(2))) int *x,
                        __attribute__((address_space(1))) int *y) {
  y[0] = x[0];
}

// CHECK: define void @_Z4funcPi(i32* %x)
__device__ void func(int *x) {
  x[0]++;
}

struct S {
  int *x;
  float *y;
};
// `by-val` struct will be coerced into a similar struct with all generic
// pointers lowerd into global ones.
// CHECK: define amdgpu_kernel void @_Z7kernel41S(%struct.S.coerce %s.coerce)
// HOST: define void @_Z7kernel41S.stub(i32* %s.coerce0, float* %s.coerce1)
__global__ void kernel4(struct S s) {
  s.x[0]++;
  s.y[0] += 1.f;
}

// If a pointer to struct is passed, only the pointer itself is coerced into the global one.
// CHECK: define amdgpu_kernel void @_Z7kernel5P1S(%struct.S addrspace(1)* %s.coerce)
// HOST: define void @_Z7kernel5P1S.stub(%struct.S* %s)
__global__ void kernel5(struct S *s) {
  s->x[0]++;
  s->y[0] += 1.f;
}

struct T {
  float *x[2];
};
// `by-val` array is also coerced.
// CHECK: define amdgpu_kernel void @_Z7kernel61T(%struct.T.coerce %t.coerce)
// HOST: define void @_Z7kernel61T.stub(float* %t.coerce0, float* %t.coerce1)
__global__ void kernel6(struct T t) {
  t.x[0][0] += 1.f;
  t.x[1][0] += 2.f;
}
OpenPOWER on IntegriCloud