summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll
blob: 368dc6ab361ea104fdfed4c5891420c6c97e4925 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
; RUN: opt -S -load-store-vectorizer -mattr=-unaligned-buffer-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s
; RUN: opt -S -load-store-vectorizer -mattr=+unaligned-buffer-access,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s

target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
target triple = "amdgcn--"

; ALL-LABEL: @load_unknown_offset_align1_i8(
; ALL: alloca [128 x i8], align 1
; UNALIGNED: load <2 x i8>, <2 x i8>* %{{[0-9]+}}, align 1{{$}}

; ALIGNED: load i8, i8* %ptr0, align 1{{$}}
; ALIGNED: load i8, i8* %ptr1, align 1{{$}}
define amdgpu_kernel void @load_unknown_offset_align1_i8(i8 addrspace(1)* noalias %out, i32 %offset) #0 {
  %alloca = alloca [128 x i8], align 1
  %ptr0 = getelementptr inbounds [128 x i8], [128 x i8]* %alloca, i32 0, i32 %offset
  %val0 = load i8, i8* %ptr0, align 1
  %ptr1 = getelementptr inbounds i8, i8* %ptr0, i32 1
  %val1 = load i8, i8* %ptr1, align 1
  %add = add i8 %val0, %val1
  store i8 %add, i8 addrspace(1)* %out
  ret void
}

; ALL-LABEL: @load_unknown_offset_align1_i16(
; ALL: alloca [128 x i16], align 1{{$}}
; UNALIGNED: load <2 x i16>, <2 x i16>* %{{[0-9]+}}, align 1{{$}}

; ALIGNED: load i16, i16* %ptr0, align 1{{$}}
; ALIGNED: load i16, i16* %ptr1, align 1{{$}}
define amdgpu_kernel void @load_unknown_offset_align1_i16(i16 addrspace(1)* noalias %out, i32 %offset) #0 {
  %alloca = alloca [128 x i16], align 1
  %ptr0 = getelementptr inbounds [128 x i16], [128 x i16]* %alloca, i32 0, i32 %offset
  %val0 = load i16, i16* %ptr0, align 1
  %ptr1 = getelementptr inbounds i16, i16* %ptr0, i32 1
  %val1 = load i16, i16* %ptr1, align 1
  %add = add i16 %val0, %val1
  store i16 %add, i16 addrspace(1)* %out
  ret void
}

; FIXME: Although the offset is unknown here, we know it is a multiple
; of the element size, so should still be align 4

; ALL-LABEL: @load_unknown_offset_align1_i32(
; ALL: alloca [128 x i32], align 1
; UNALIGNED: load <2 x i32>, <2 x i32>* %{{[0-9]+}}, align 1{{$}}

; ALIGNED: load i32, i32* %ptr0, align 1
; ALIGNED: load i32, i32* %ptr1, align 1
define amdgpu_kernel void @load_unknown_offset_align1_i32(i32 addrspace(1)* noalias %out, i32 %offset) #0 {
  %alloca = alloca [128 x i32], align 1
  %ptr0 = getelementptr inbounds [128 x i32], [128 x i32]* %alloca, i32 0, i32 %offset
  %val0 = load i32, i32* %ptr0, align 1
  %ptr1 = getelementptr inbounds i32, i32* %ptr0, i32 1
  %val1 = load i32, i32* %ptr1, align 1
  %add = add i32 %val0, %val1
  store i32 %add, i32 addrspace(1)* %out
  ret void
}

; FIXME: Should always increase alignment of the load
; Make sure alloca alignment isn't decreased
; ALL-LABEL: @load_alloca16_unknown_offset_align1_i32(
; ALL: alloca [128 x i32], align 16

; UNALIGNED: load <2 x i32>, <2 x i32>* %{{[0-9]+}}, align 1{{$}}

; FIXME: Should change alignment
; ALIGNED: load i32
; ALIGNED: load i32
define amdgpu_kernel void @load_alloca16_unknown_offset_align1_i32(i32 addrspace(1)* noalias %out, i32 %offset) #0 {
  %alloca = alloca [128 x i32], align 16
  %ptr0 = getelementptr inbounds [128 x i32], [128 x i32]* %alloca, i32 0, i32 %offset
  %val0 = load i32, i32* %ptr0, align 1
  %ptr1 = getelementptr inbounds i32, i32* %ptr0, i32 1
  %val1 = load i32, i32* %ptr1, align 1
  %add = add i32 %val0, %val1
  store i32 %add, i32 addrspace(1)* %out
  ret void
}

; ALL-LABEL: @store_unknown_offset_align1_i8(
; ALL: alloca [128 x i8], align 1
; UNALIGNED: store <2 x i8> <i8 9, i8 10>, <2 x i8>* %{{[0-9]+}}, align 1{{$}}

; ALIGNED: store i8 9, i8* %ptr0, align 1{{$}}
; ALIGNED: store i8 10, i8* %ptr1, align 1{{$}}
define amdgpu_kernel void @store_unknown_offset_align1_i8(i8 addrspace(1)* noalias %out, i32 %offset) #0 {
  %alloca = alloca [128 x i8], align 1
  %ptr0 = getelementptr inbounds [128 x i8], [128 x i8]* %alloca, i32 0, i32 %offset
  store i8 9, i8* %ptr0, align 1
  %ptr1 = getelementptr inbounds i8, i8* %ptr0, i32 1
  store i8 10, i8* %ptr1, align 1
  ret void
}

; ALL-LABEL: @store_unknown_offset_align1_i16(
; ALL: alloca [128 x i16], align 1
; UNALIGNED: store <2 x i16> <i16 9, i16 10>, <2 x i16>* %{{[0-9]+}}, align 1{{$}}

; ALIGNED: store i16 9, i16* %ptr0, align 1{{$}}
; ALIGNED: store i16 10, i16* %ptr1, align 1{{$}}
define amdgpu_kernel void @store_unknown_offset_align1_i16(i16 addrspace(1)* noalias %out, i32 %offset) #0 {
  %alloca = alloca [128 x i16], align 1
  %ptr0 = getelementptr inbounds [128 x i16], [128 x i16]* %alloca, i32 0, i32 %offset
  store i16 9, i16* %ptr0, align 1
  %ptr1 = getelementptr inbounds i16, i16* %ptr0, i32 1
  store i16 10, i16* %ptr1, align 1
  ret void
}

; FIXME: Although the offset is unknown here, we know it is a multiple
; of the element size, so it still should be align 4.

; ALL-LABEL: @store_unknown_offset_align1_i32(
; ALL: alloca [128 x i32], align 1

; UNALIGNED: store <2 x i32> <i32 9, i32 10>, <2 x i32>* %{{[0-9]+}}, align 1{{$}}

; ALIGNED: store i32 9, i32* %ptr0, align 1
; ALIGNED: store i32 10, i32* %ptr1, align 1
define amdgpu_kernel void @store_unknown_offset_align1_i32(i32 addrspace(1)* noalias %out, i32 %offset) #0 {
  %alloca = alloca [128 x i32], align 1
  %ptr0 = getelementptr inbounds [128 x i32], [128 x i32]* %alloca, i32 0, i32 %offset
  store i32 9, i32* %ptr0, align 1
  %ptr1 = getelementptr inbounds i32, i32* %ptr0, i32 1
  store i32 10, i32* %ptr1, align 1
  ret void
}

attributes #0 = { nounwind }

OpenPOWER on IntegriCloud