diff options
Diffstat (limited to 'llvm/test/Transforms/LoadStoreVectorizer')
15 files changed, 116 insertions, 116 deletions
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/aa-metadata.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/aa-metadata.ll index e6904ee50bc..4b2dab47a20 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/aa-metadata.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/aa-metadata.ll @@ -15,7 +15,7 @@ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24: ; NOSCOPE: load float ; NOSCOPE: store float ; NOSCOPE: store float -define void @vectorize_alias_scope(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c) #0 { +define amdgpu_kernel void @vectorize_alias_scope(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c) #0 { entry: %a.idx.1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1 store float 0.0, float addrspace(1)* %a, align 4, !noalias !0 diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll index d8f72a8e1df..368dc6ab361 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll @@ -10,7 +10,7 @@ target triple = "amdgcn--" ; ALIGNED: load i8, i8* %ptr0, align 1{{$}} ; ALIGNED: load i8, i8* %ptr1, align 1{{$}} -define void @load_unknown_offset_align1_i8(i8 addrspace(1)* noalias %out, i32 %offset) #0 { +define amdgpu_kernel void @load_unknown_offset_align1_i8(i8 addrspace(1)* noalias %out, i32 %offset) #0 { %alloca = alloca [128 x i8], align 1 %ptr0 = getelementptr inbounds [128 x i8], [128 x i8]* %alloca, i32 0, i32 %offset %val0 = load i8, i8* %ptr0, align 1 @@ -27,7 +27,7 @@ define void @load_unknown_offset_align1_i8(i8 addrspace(1)* noalias %out, i32 %o ; ALIGNED: load i16, i16* %ptr0, align 1{{$}} ; ALIGNED: load i16, i16* %ptr1, align 1{{$}} -define void @load_unknown_offset_align1_i16(i16 addrspace(1)* noalias %out, i32 %offset) #0 { +define amdgpu_kernel void @load_unknown_offset_align1_i16(i16 addrspace(1)* noalias %out, i32 %offset) #0 { %alloca = alloca [128 x i16], align 1 %ptr0 = getelementptr inbounds [128 x i16], [128 x i16]* %alloca, i32 0, i32 %offset %val0 = load i16, i16* %ptr0, align 1 @@ -47,7 +47,7 @@ define void @load_unknown_offset_align1_i16(i16 addrspace(1)* noalias %out, i32 ; ALIGNED: load i32, i32* %ptr0, align 1 ; ALIGNED: load i32, i32* %ptr1, align 1 -define void @load_unknown_offset_align1_i32(i32 addrspace(1)* noalias %out, i32 %offset) #0 { +define amdgpu_kernel void @load_unknown_offset_align1_i32(i32 addrspace(1)* noalias %out, i32 %offset) #0 { %alloca = alloca [128 x i32], align 1 %ptr0 = getelementptr inbounds [128 x i32], [128 x i32]* %alloca, i32 0, i32 %offset %val0 = load i32, i32* %ptr0, align 1 @@ -68,7 +68,7 @@ define void @load_unknown_offset_align1_i32(i32 addrspace(1)* noalias %out, i32 ; FIXME: Should change alignment ; ALIGNED: load i32 ; ALIGNED: load i32 -define void @load_alloca16_unknown_offset_align1_i32(i32 addrspace(1)* noalias %out, i32 %offset) #0 { +define amdgpu_kernel void @load_alloca16_unknown_offset_align1_i32(i32 addrspace(1)* noalias %out, i32 %offset) #0 { %alloca = alloca [128 x i32], align 16 %ptr0 = getelementptr inbounds [128 x i32], [128 x i32]* %alloca, i32 0, i32 %offset %val0 = load i32, i32* %ptr0, align 1 @@ -85,7 +85,7 @@ define void @load_alloca16_unknown_offset_align1_i32(i32 addrspace(1)* noalias % ; ALIGNED: store i8 9, i8* %ptr0, align 1{{$}} ; ALIGNED: store i8 10, i8* %ptr1, align 1{{$}} -define void @store_unknown_offset_align1_i8(i8 addrspace(1)* noalias %out, i32 %offset) #0 { +define amdgpu_kernel void @store_unknown_offset_align1_i8(i8 addrspace(1)* noalias %out, i32 %offset) #0 { %alloca = alloca [128 x i8], align 1 %ptr0 = getelementptr inbounds [128 x i8], [128 x i8]* %alloca, i32 0, i32 %offset store i8 9, i8* %ptr0, align 1 @@ -100,7 +100,7 @@ define void @store_unknown_offset_align1_i8(i8 addrspace(1)* noalias %out, i32 % ; ALIGNED: store i16 9, i16* %ptr0, align 1{{$}} ; ALIGNED: store i16 10, i16* %ptr1, align 1{{$}} -define void @store_unknown_offset_align1_i16(i16 addrspace(1)* noalias %out, i32 %offset) #0 { +define amdgpu_kernel void @store_unknown_offset_align1_i16(i16 addrspace(1)* noalias %out, i32 %offset) #0 { %alloca = alloca [128 x i16], align 1 %ptr0 = getelementptr inbounds [128 x i16], [128 x i16]* %alloca, i32 0, i32 %offset store i16 9, i16* %ptr0, align 1 @@ -119,7 +119,7 @@ define void @store_unknown_offset_align1_i16(i16 addrspace(1)* noalias %out, i32 ; ALIGNED: store i32 9, i32* %ptr0, align 1 ; ALIGNED: store i32 10, i32* %ptr1, align 1 -define void @store_unknown_offset_align1_i32(i32 addrspace(1)* noalias %out, i32 %offset) #0 { +define amdgpu_kernel void @store_unknown_offset_align1_i32(i32 addrspace(1)* noalias %out, i32 %offset) #0 { %alloca = alloca [128 x i32], align 1 %ptr0 = getelementptr inbounds [128 x i32], [128 x i32]* %alloca, i32 0, i32 %offset store i32 9, i32* %ptr0, align 1 diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/extended-index.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/extended-index.ll index 25abb98c6eb..8a75b8743fa 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/extended-index.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/extended-index.ll @@ -8,7 +8,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1 ; CHECK: sext i32 %id.x to i64 ; CHECK: load <2 x float> ; CHECK: store <2 x float> zeroinitializer -define void @basic_merge_sext_index(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c) #0 { +define amdgpu_kernel void @basic_merge_sext_index(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c) #0 { entry: %id.x = call i32 @llvm.amdgcn.workitem.id.x() %sext.id.x = sext i32 %id.x to i64 @@ -32,7 +32,7 @@ entry: ; CHECK: zext i32 %id.x to i64 ; CHECK: load <2 x float> ; CHECK: store <2 x float> -define void @basic_merge_zext_index(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c) #0 { +define amdgpu_kernel void @basic_merge_zext_index(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c) #0 { entry: %id.x = call i32 @llvm.amdgcn.workitem.id.x() %zext.id.x = zext i32 %id.x to i64 @@ -54,7 +54,7 @@ entry: ; CHECK-LABEL: @merge_op_zext_index( ; CHECK: load <2 x float> ; CHECK: store <2 x float> -define void @merge_op_zext_index(float addrspace(1)* nocapture noalias %a, float addrspace(1)* nocapture noalias %b, float addrspace(1)* nocapture readonly noalias %c) #0 { +define amdgpu_kernel void @merge_op_zext_index(float addrspace(1)* nocapture noalias %a, float addrspace(1)* nocapture noalias %b, float addrspace(1)* nocapture readonly noalias %c) #0 { entry: %id.x = call i32 @llvm.amdgcn.workitem.id.x() %shl = shl i32 %id.x, 2 @@ -81,7 +81,7 @@ entry: ; CHECK-LABEL: @merge_op_sext_index( ; CHECK: load <2 x float> ; CHECK: store <2 x float> -define void @merge_op_sext_index(float addrspace(1)* nocapture noalias %a, float addrspace(1)* nocapture noalias %b, float addrspace(1)* nocapture readonly noalias %c) #0 { +define amdgpu_kernel void @merge_op_sext_index(float addrspace(1)* nocapture noalias %a, float addrspace(1)* nocapture noalias %b, float addrspace(1)* nocapture readonly noalias %c) #0 { entry: %id.x = call i32 @llvm.amdgcn.workitem.id.x() %shl = shl i32 %id.x, 2 @@ -112,7 +112,7 @@ entry: ; CHECK: loop: ; CHECK: load <2 x i32> ; CHECK: store <2 x i32> -define void @zext_trunc_phi_1(i32 addrspace(1)* nocapture noalias %a, i32 addrspace(1)* nocapture noalias %b, i32 addrspace(1)* nocapture readonly noalias %c, i32 %n, i64 %arst, i64 %aoeu) #0 { +define amdgpu_kernel void @zext_trunc_phi_1(i32 addrspace(1)* nocapture noalias %a, i32 addrspace(1)* nocapture noalias %b, i32 addrspace(1)* nocapture readonly noalias %c, i32 %n, i64 %arst, i64 %aoeu) #0 { entry: %cmp0 = icmp eq i32 %n, 0 br i1 %cmp0, label %exit, label %loop diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/insertion-point.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/insertion-point.ll index 2b2f9cbcf50..6182c09abcf 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/insertion-point.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/insertion-point.ll @@ -11,7 +11,7 @@ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24: ; CHECK: load <2 x float> ; CHECK: %w = add i32 %y, 9 ; CHECK: %foo = add i32 %z, %w -define void @insert_load_point(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c, i64 %idx, i32 %x, i32 %y) #0 { +define amdgpu_kernel void @insert_load_point(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c, i64 %idx, i32 %x, i32 %y) #0 { entry: %a.idx.x = getelementptr inbounds float, float addrspace(1)* %a, i64 %idx %c.idx.x = getelementptr inbounds float, float addrspace(1)* %c, i64 %idx @@ -38,7 +38,7 @@ entry: ; CHECK: %w = add i32 %y, 9 ; CHECK: store <2 x float> ; CHECK: %foo = add i32 %z, %w -define void @insert_store_point(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c, i64 %idx, i32 %x, i32 %y) #0 { +define amdgpu_kernel void @insert_store_point(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c, i64 %idx, i32 %x, i32 %y) #0 { entry: %a.idx.x = getelementptr inbounds float, float addrspace(1)* %a, i64 %idx %c.idx.x = getelementptr inbounds float, float addrspace(1)* %c, i64 %idx diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/interleaved-mayalias-store.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/interleaved-mayalias-store.ll index 4d6240a9aa9..3f6d7ee7dca 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/interleaved-mayalias-store.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/interleaved-mayalias-store.ll @@ -8,7 +8,7 @@ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24: ; CHECK: store double 0.000000e+00, double addrspace(1)* %a, ; CHECK: load double ; CHECK: store double 0.000000e+00, double addrspace(1)* %a.idx.1 -define void @interleave(double addrspace(1)* nocapture %a, double addrspace(1)* nocapture %b, double addrspace(1)* nocapture readonly %c) #0 { +define amdgpu_kernel void @interleave(double addrspace(1)* nocapture %a, double addrspace(1)* nocapture %b, double addrspace(1)* nocapture readonly %c) #0 { entry: %a.idx.1 = getelementptr inbounds double, double addrspace(1)* %a, i64 1 %c.idx.1 = getelementptr inbounds double, double addrspace(1)* %c, i64 1 diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll index c85be874376..0fcdc7b9083 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll @@ -17,7 +17,7 @@ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24: ; ELT8-UNALIGNED: store <2 x i32> ; ELT16-UNALIGNED: store <4 x i32> -define void @merge_private_store_4_vector_elts_loads_v4i32(i32* %out) #0 { +define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32(i32* %out) #0 { %out.gep.1 = getelementptr i32, i32* %out, i32 1 %out.gep.2 = getelementptr i32, i32* %out, i32 2 %out.gep.3 = getelementptr i32, i32* %out, i32 3 @@ -44,7 +44,7 @@ define void @merge_private_store_4_vector_elts_loads_v4i32(i32* %out) #0 { ; ELT4-UNALIGNED: store i32 ; ELT4-UNALIGNED: store i32 ; ELT4-UNALIGNED: store i32 -define void @merge_private_store_4_vector_elts_loads_v4i32_align1(i32* %out) #0 { +define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32_align1(i32* %out) #0 { %out.gep.1 = getelementptr i32, i32* %out, i32 1 %out.gep.2 = getelementptr i32, i32* %out, i32 2 %out.gep.3 = getelementptr i32, i32* %out, i32 3 @@ -71,7 +71,7 @@ define void @merge_private_store_4_vector_elts_loads_v4i32_align1(i32* %out) #0 ; ELT4-UNALIGNED: store i32 ; ELT4-UNALIGNED: store i32 ; ELT4-UNALIGNED: store i32 -define void @merge_private_store_4_vector_elts_loads_v4i32_align2(i32* %out) #0 { +define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32_align2(i32* %out) #0 { %out.gep.1 = getelementptr i32, i32* %out, i32 1 %out.gep.2 = getelementptr i32, i32* %out, i32 2 %out.gep.3 = getelementptr i32, i32* %out, i32 3 @@ -85,7 +85,7 @@ define void @merge_private_store_4_vector_elts_loads_v4i32_align2(i32* %out) #0 ; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v4i8( ; ALL: store <4 x i8> -define void @merge_private_store_4_vector_elts_loads_v4i8(i8* %out) #0 { +define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i8(i8* %out) #0 { %out.gep.1 = getelementptr i8, i8* %out, i32 1 %out.gep.2 = getelementptr i8, i8* %out, i32 2 %out.gep.3 = getelementptr i8, i8* %out, i32 3 @@ -104,7 +104,7 @@ define void @merge_private_store_4_vector_elts_loads_v4i8(i8* %out) #0 { ; ALIGNED: store i8 ; UNALIGNED: store <4 x i8> <i8 9, i8 1, i8 23, i8 19>, <4 x i8>* %1, align 1 -define void @merge_private_store_4_vector_elts_loads_v4i8_align1(i8* %out) #0 { +define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i8_align1(i8* %out) #0 { %out.gep.1 = getelementptr i8, i8* %out, i32 1 %out.gep.2 = getelementptr i8, i8* %out, i32 2 %out.gep.3 = getelementptr i8, i8* %out, i32 3 @@ -118,7 +118,7 @@ define void @merge_private_store_4_vector_elts_loads_v4i8_align1(i8* %out) #0 { ; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v2i16( ; ALL: store <2 x i16> -define void @merge_private_store_4_vector_elts_loads_v2i16(i16* %out) #0 { +define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16(i16* %out) #0 { %out.gep.1 = getelementptr i16, i16* %out, i32 1 store i16 9, i16* %out, align 4 @@ -131,7 +131,7 @@ define void @merge_private_store_4_vector_elts_loads_v2i16(i16* %out) #0 { ; ALIGNED: store i16 ; UNALIGNED: store <2 x i16> <i16 9, i16 12>, <2 x i16>* %1, align 2 -define void @merge_private_store_4_vector_elts_loads_v2i16_align2(i16* %out) #0 { +define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16_align2(i16* %out) #0 { %out.gep.1 = getelementptr i16, i16* %out, i32 1 store i16 9, i16* %out, align 2 @@ -144,7 +144,7 @@ define void @merge_private_store_4_vector_elts_loads_v2i16_align2(i16* %out) #0 ; ALIGNED: store i16 ; UNALIGNED: store <2 x i16> <i16 9, i16 12>, <2 x i16>* %1, align 1 -define void @merge_private_store_4_vector_elts_loads_v2i16_align1(i16* %out) #0 { +define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16_align1(i16* %out) #0 { %out.gep.1 = getelementptr i16, i16* %out, i32 1 store i16 9, i16* %out, align 1 @@ -154,7 +154,7 @@ define void @merge_private_store_4_vector_elts_loads_v2i16_align1(i16* %out) #0 ; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v2i16_align8( ; ALL: store <2 x i16> <i16 9, i16 12>, <2 x i16>* %1, align 8 -define void @merge_private_store_4_vector_elts_loads_v2i16_align8(i16* %out) #0 { +define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16_align8(i16* %out) #0 { %out.gep.1 = getelementptr i16, i16* %out, i32 1 store i16 9, i16* %out, align 8 @@ -179,7 +179,7 @@ define void @merge_private_store_4_vector_elts_loads_v2i16_align8(i16* %out) #0 ; ELT16-ALIGNED: store i32 ; ELT16-UNALIGNED: store <3 x i32> -define void @merge_private_store_3_vector_elts_loads_v4i32(i32* %out) #0 { +define amdgpu_kernel void @merge_private_store_3_vector_elts_loads_v4i32(i32* %out) #0 { %out.gep.1 = getelementptr i32, i32* %out, i32 1 %out.gep.2 = getelementptr i32, i32* %out, i32 2 @@ -202,7 +202,7 @@ define void @merge_private_store_3_vector_elts_loads_v4i32(i32* %out) #0 { ; ELT8-UNALIGNED: store i32 ; ELT16-UNALIGNED: store <3 x i32> -define void @merge_private_store_3_vector_elts_loads_v4i32_align1(i32* %out) #0 { +define amdgpu_kernel void @merge_private_store_3_vector_elts_loads_v4i32_align1(i32* %out) #0 { %out.gep.1 = getelementptr i32, i32* %out, i32 1 %out.gep.2 = getelementptr i32, i32* %out, i32 2 @@ -218,7 +218,7 @@ define void @merge_private_store_3_vector_elts_loads_v4i32_align1(i32* %out) #0 ; ALIGNED: store i8 ; UNALIGNED: store <3 x i8> -define void @merge_private_store_3_vector_elts_loads_v4i8_align1(i8* %out) #0 { +define amdgpu_kernel void @merge_private_store_3_vector_elts_loads_v4i8_align1(i8* %out) #0 { %out.gep.1 = getelementptr i8, i8* %out, i8 1 %out.gep.2 = getelementptr i8, i8* %out, i8 2 diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll index d32387fa2c0..dbb7068eeae 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll @@ -10,7 +10,7 @@ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24: ; CHECK-LABEL: @merge_global_store_2_constants_i8( ; CHECK: store <2 x i8> <i8 -56, i8 123>, <2 x i8> addrspace(1)* %{{[0-9]+}}, align 2 -define void @merge_global_store_2_constants_i8(i8 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_2_constants_i8(i8 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i32 1 store i8 123, i8 addrspace(1)* %out.gep.1 @@ -20,7 +20,7 @@ define void @merge_global_store_2_constants_i8(i8 addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_global_store_2_constants_i8_natural_align ; CHECK: store <2 x i8> -define void @merge_global_store_2_constants_i8_natural_align(i8 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_2_constants_i8_natural_align(i8 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i32 1 store i8 123, i8 addrspace(1)* %out.gep.1 @@ -30,7 +30,7 @@ define void @merge_global_store_2_constants_i8_natural_align(i8 addrspace(1)* %o ; CHECK-LABEL: @merge_global_store_2_constants_i16 ; CHECK: store <2 x i16> <i16 456, i16 123>, <2 x i16> addrspace(1)* %{{[0-9]+}}, align 4 -define void @merge_global_store_2_constants_i16(i16 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_2_constants_i16(i16 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i16, i16 addrspace(1)* %out, i32 1 store i16 123, i16 addrspace(1)* %out.gep.1 @@ -40,7 +40,7 @@ define void @merge_global_store_2_constants_i16(i16 addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_global_store_2_constants_0_i16 ; CHECK: store <2 x i16> zeroinitializer, <2 x i16> addrspace(1)* %{{[0-9]+}}, align 4 -define void @merge_global_store_2_constants_0_i16(i16 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_2_constants_0_i16(i16 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i16, i16 addrspace(1)* %out, i32 1 store i16 0, i16 addrspace(1)* %out.gep.1 @@ -50,7 +50,7 @@ define void @merge_global_store_2_constants_0_i16(i16 addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_global_store_2_constants_i16_natural_align ; CHECK: store <2 x i16> -define void @merge_global_store_2_constants_i16_natural_align(i16 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_2_constants_i16_natural_align(i16 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i16, i16 addrspace(1)* %out, i32 1 store i16 123, i16 addrspace(1)* %out.gep.1 @@ -60,7 +60,7 @@ define void @merge_global_store_2_constants_i16_natural_align(i16 addrspace(1)* ; CHECK-LABEL: @merge_global_store_2_constants_half_natural_align ; CHECK: store <2 x half> -define void @merge_global_store_2_constants_half_natural_align(half addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_2_constants_half_natural_align(half addrspace(1)* %out) #0 { %out.gep.1 = getelementptr half, half addrspace(1)* %out, i32 1 store half 2.0, half addrspace(1)* %out.gep.1 @@ -70,7 +70,7 @@ define void @merge_global_store_2_constants_half_natural_align(half addrspace(1) ; CHECK-LABEL: @merge_global_store_2_constants_i32 ; CHECK: store <2 x i32> <i32 456, i32 123>, <2 x i32> addrspace(1)* %{{[0-9]+}}, align 4 -define void @merge_global_store_2_constants_i32(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_2_constants_i32(i32 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 store i32 123, i32 addrspace(1)* %out.gep.1 @@ -80,7 +80,7 @@ define void @merge_global_store_2_constants_i32(i32 addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_global_store_2_constants_i32_f32 ; CHECK: store <2 x i32> <i32 456, i32 1065353216>, <2 x i32> addrspace(1)* %{{[0-9]+}}, align 4 -define void @merge_global_store_2_constants_i32_f32(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_2_constants_i32_f32(i32 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 %out.gep.1.bc = bitcast i32 addrspace(1)* %out.gep.1 to float addrspace(1)* store float 1.0, float addrspace(1)* %out.gep.1.bc @@ -90,7 +90,7 @@ define void @merge_global_store_2_constants_i32_f32(i32 addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_global_store_2_constants_f32_i32 ; CHECK store <2 x float> <float 4.000000e+00, float 0x370EC00000000000>, <2 x float> addrspace(1)* %{{[0-9]+$}} -define void @merge_global_store_2_constants_f32_i32(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_2_constants_f32_i32(float addrspace(1)* %out) #0 { %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 %out.gep.1.bc = bitcast float addrspace(1)* %out.gep.1 to i32 addrspace(1)* store i32 123, i32 addrspace(1)* %out.gep.1.bc @@ -100,7 +100,7 @@ define void @merge_global_store_2_constants_f32_i32(float addrspace(1)* %out) #0 ; CHECK-LABEL: @merge_global_store_4_constants_i32 ; CHECK: store <4 x i32> <i32 1234, i32 123, i32 456, i32 333>, <4 x i32> addrspace(1)* %{{[0-9]+}}, align 4 -define void @merge_global_store_4_constants_i32(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_4_constants_i32(i32 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2 %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3 @@ -114,7 +114,7 @@ define void @merge_global_store_4_constants_i32(i32 addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_global_store_4_constants_f32_order ; CHECK: store <4 x float> <float 8.000000e+00, float 1.000000e+00, float 2.000000e+00, float 4.000000e+00>, <4 x float> addrspace(1)* %{{[0-9]+}} -define void @merge_global_store_4_constants_f32_order(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_4_constants_f32_order(float addrspace(1)* %out) #0 { %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2 %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3 @@ -129,7 +129,7 @@ define void @merge_global_store_4_constants_f32_order(float addrspace(1)* %out) ; First store is out of order. ; CHECK-LABEL: @merge_global_store_4_constants_f32 ; CHECK: store <4 x float> <float 8.000000e+00, float 1.000000e+00, float 2.000000e+00, float 4.000000e+00>, <4 x float> addrspace(1)* %{{[0-9]+}}, align 4 -define void @merge_global_store_4_constants_f32(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_4_constants_f32(float addrspace(1)* %out) #0 { %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2 %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3 @@ -143,7 +143,7 @@ define void @merge_global_store_4_constants_f32(float addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_global_store_4_constants_mixed_i32_f32 ; CHECK: store <4 x i32> <i32 1090519040, i32 11, i32 1073741824, i32 17>, <4 x i32> addrspace(1)* %{{[0-9]+}}, align 4 -define void @merge_global_store_4_constants_mixed_i32_f32(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_4_constants_mixed_i32_f32(float addrspace(1)* %out) #0 { %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2 %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3 @@ -160,7 +160,7 @@ define void @merge_global_store_4_constants_mixed_i32_f32(float addrspace(1)* %o ; CHECK-LABEL: @merge_global_store_3_constants_i32 ; CHECK: store <3 x i32> <i32 1234, i32 123, i32 456>, <3 x i32> addrspace(1)* %{{[0-9]+}}, align 4 -define void @merge_global_store_3_constants_i32(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_3_constants_i32(i32 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2 @@ -172,7 +172,7 @@ define void @merge_global_store_3_constants_i32(i32 addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_global_store_2_constants_i64 ; CHECK: store <2 x i64> <i64 456, i64 123>, <2 x i64> addrspace(1)* %{{[0-9]+}}, align 8 -define void @merge_global_store_2_constants_i64(i64 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_2_constants_i64(i64 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i64, i64 addrspace(1)* %out, i64 1 store i64 123, i64 addrspace(1)* %out.gep.1 @@ -183,7 +183,7 @@ define void @merge_global_store_2_constants_i64(i64 addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_global_store_4_constants_i64 ; CHECK: store <2 x i64> <i64 456, i64 333>, <2 x i64> addrspace(1)* %{{[0-9]+}}, align 8 ; CHECK: store <2 x i64> <i64 1234, i64 123>, <2 x i64> addrspace(1)* %{{[0-9]+}}, align 8 -define void @merge_global_store_4_constants_i64(i64 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_global_store_4_constants_i64(i64 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i64, i64 addrspace(1)* %out, i64 1 %out.gep.2 = getelementptr i64, i64 addrspace(1)* %out, i64 2 %out.gep.3 = getelementptr i64, i64 addrspace(1)* %out, i64 3 @@ -202,7 +202,7 @@ define void @merge_global_store_4_constants_i64(i64 addrspace(1)* %out) #0 { ; CHECK: [[INSERT0:%[^ ]+]] = insertelement <2 x i32> undef, i32 [[ELT0]], i32 0 ; CHECK: [[INSERT1:%[^ ]+]] = insertelement <2 x i32> [[INSERT0]], i32 [[ELT1]], i32 1 ; CHECK: store <2 x i32> [[INSERT1]] -define void @merge_global_store_2_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @merge_global_store_2_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1 @@ -220,7 +220,7 @@ define void @merge_global_store_2_adjacent_loads_i32(i32 addrspace(1)* %out, i32 ; CHECK: insertelement ; CHECK: insertelement ; CHECK: store <2 x i32> -define void @merge_global_store_2_adjacent_loads_i32_nonzero_base(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @merge_global_store_2_adjacent_loads_i32_nonzero_base(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %in.gep.0 = getelementptr i32, i32 addrspace(1)* %in, i32 2 %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 3 @@ -241,7 +241,7 @@ define void @merge_global_store_2_adjacent_loads_i32_nonzero_base(i32 addrspace( ; CHECK: [[INSERT0:%[^ ]+]] = insertelement <2 x i32> undef, i32 [[ELT1]], i32 0 ; CHECK: [[INSERT1:%[^ ]+]] = insertelement <2 x i32> [[INSERT0]], i32 [[ELT0]], i32 1 ; CHECK: store <2 x i32> [[INSERT1]] -define void @merge_global_store_2_adjacent_loads_shuffle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @merge_global_store_2_adjacent_loads_shuffle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1 @@ -256,7 +256,7 @@ define void @merge_global_store_2_adjacent_loads_shuffle_i32(i32 addrspace(1)* % ; CHECK-LABEL: @merge_global_store_4_adjacent_loads_i32 ; CHECK: load <4 x i32> ; CHECK: store <4 x i32> -define void @merge_global_store_4_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2 %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3 @@ -279,7 +279,7 @@ define void @merge_global_store_4_adjacent_loads_i32(i32 addrspace(1)* %out, i32 ; CHECK-LABEL: @merge_global_store_3_adjacent_loads_i32 ; CHECK: load <3 x i32> ; CHECK: store <3 x i32> -define void @merge_global_store_3_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @merge_global_store_3_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2 %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1 @@ -298,7 +298,7 @@ define void @merge_global_store_3_adjacent_loads_i32(i32 addrspace(1)* %out, i32 ; CHECK-LABEL: @merge_global_store_4_adjacent_loads_f32 ; CHECK: load <4 x float> ; CHECK: store <4 x float> -define void @merge_global_store_4_adjacent_loads_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { +define amdgpu_kernel void @merge_global_store_4_adjacent_loads_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2 %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3 @@ -321,7 +321,7 @@ define void @merge_global_store_4_adjacent_loads_f32(float addrspace(1)* %out, f ; CHECK-LABEL: @merge_global_store_4_adjacent_loads_i32_nonzero_base ; CHECK: load <4 x i32> ; CHECK: store <4 x i32> -define void @merge_global_store_4_adjacent_loads_i32_nonzero_base(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i32_nonzero_base(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %in.gep.0 = getelementptr i32, i32 addrspace(1)* %in, i32 11 %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 12 %in.gep.2 = getelementptr i32, i32 addrspace(1)* %in, i32 13 @@ -346,7 +346,7 @@ define void @merge_global_store_4_adjacent_loads_i32_nonzero_base(i32 addrspace( ; CHECK-LABEL: @merge_global_store_4_adjacent_loads_inverse_i32 ; CHECK: load <4 x i32> ; CHECK: store <4 x i32> -define void @merge_global_store_4_adjacent_loads_inverse_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @merge_global_store_4_adjacent_loads_inverse_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2 %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3 @@ -373,7 +373,7 @@ define void @merge_global_store_4_adjacent_loads_inverse_i32(i32 addrspace(1)* % ; CHECK-LABEL: @merge_global_store_4_adjacent_loads_shuffle_i32 ; CHECK: load <4 x i32> ; CHECK: store <4 x i32> -define void @merge_global_store_4_adjacent_loads_shuffle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @merge_global_store_4_adjacent_loads_shuffle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2 %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3 @@ -408,7 +408,7 @@ define void @merge_global_store_4_adjacent_loads_shuffle_i32(i32 addrspace(1)* % ; CHECK: insertelement <4 x i8> ; CHECK: insertelement <4 x i8> ; CHECK: store <4 x i8> -define void @merge_global_store_4_adjacent_loads_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { +define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i8 1 %out.gep.2 = getelementptr i8, i8 addrspace(1)* %out, i8 2 %out.gep.3 = getelementptr i8, i8 addrspace(1)* %out, i8 3 @@ -431,7 +431,7 @@ define void @merge_global_store_4_adjacent_loads_i8(i8 addrspace(1)* %out, i8 ad ; CHECK-LABEL: @merge_global_store_4_adjacent_loads_i8_natural_align ; CHECK: load <4 x i8> ; CHECK: store <4 x i8> -define void @merge_global_store_4_adjacent_loads_i8_natural_align(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { +define amdgpu_kernel void @merge_global_store_4_adjacent_loads_i8_natural_align(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i8 1 %out.gep.2 = getelementptr i8, i8 addrspace(1)* %out, i8 2 %out.gep.3 = getelementptr i8, i8 addrspace(1)* %out, i8 3 @@ -454,7 +454,7 @@ define void @merge_global_store_4_adjacent_loads_i8_natural_align(i8 addrspace(1 ; CHECK-LABEL: @merge_global_store_4_vector_elts_loads_v4i32 ; CHECK: load <4 x i32> ; CHECK: store <4 x i32> -define void @merge_global_store_4_vector_elts_loads_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 { +define amdgpu_kernel void @merge_global_store_4_vector_elts_loads_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2 %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3 @@ -474,7 +474,7 @@ define void @merge_global_store_4_vector_elts_loads_v4i32(i32 addrspace(1)* %out ; CHECK-LABEL: @merge_local_store_2_constants_i8 ; CHECK: store <2 x i8> <i8 -56, i8 123>, <2 x i8> addrspace(3)* %{{[0-9]+}}, align 2 -define void @merge_local_store_2_constants_i8(i8 addrspace(3)* %out) #0 { +define amdgpu_kernel void @merge_local_store_2_constants_i8(i8 addrspace(3)* %out) #0 { %out.gep.1 = getelementptr i8, i8 addrspace(3)* %out, i32 1 store i8 123, i8 addrspace(3)* %out.gep.1 @@ -484,7 +484,7 @@ define void @merge_local_store_2_constants_i8(i8 addrspace(3)* %out) #0 { ; CHECK-LABEL: @merge_local_store_2_constants_i32 ; CHECK: store <2 x i32> <i32 456, i32 123>, <2 x i32> addrspace(3)* %{{[0-9]+}}, align 4 -define void @merge_local_store_2_constants_i32(i32 addrspace(3)* %out) #0 { +define amdgpu_kernel void @merge_local_store_2_constants_i32(i32 addrspace(3)* %out) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1 store i32 123, i32 addrspace(3)* %out.gep.1 @@ -495,7 +495,7 @@ define void @merge_local_store_2_constants_i32(i32 addrspace(3)* %out) #0 { ; CHECK-LABEL: @merge_local_store_2_constants_i32_align_2 ; CHECK: store i32 ; CHECK: store i32 -define void @merge_local_store_2_constants_i32_align_2(i32 addrspace(3)* %out) #0 { +define amdgpu_kernel void @merge_local_store_2_constants_i32_align_2(i32 addrspace(3)* %out) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1 store i32 123, i32 addrspace(3)* %out.gep.1, align 2 @@ -506,7 +506,7 @@ define void @merge_local_store_2_constants_i32_align_2(i32 addrspace(3)* %out) # ; CHECK-LABEL: @merge_local_store_4_constants_i32 ; CHECK: store <2 x i32> <i32 456, i32 333>, <2 x i32> addrspace(3)* ; CHECK: store <2 x i32> <i32 1234, i32 123>, <2 x i32> addrspace(3)* -define void @merge_local_store_4_constants_i32(i32 addrspace(3)* %out) #0 { +define amdgpu_kernel void @merge_local_store_4_constants_i32(i32 addrspace(3)* %out) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1 %out.gep.2 = getelementptr i32, i32 addrspace(3)* %out, i32 2 %out.gep.3 = getelementptr i32, i32 addrspace(3)* %out, i32 3 @@ -521,7 +521,7 @@ define void @merge_local_store_4_constants_i32(i32 addrspace(3)* %out) #0 { ; CHECK-LABEL: @merge_global_store_5_constants_i32 ; CHECK: store <4 x i32> <i32 9, i32 12, i32 16, i32 -12>, <4 x i32> addrspace(1)* %{{[0-9]+}}, align 4 ; CHECK: store i32 -define void @merge_global_store_5_constants_i32(i32 addrspace(1)* %out) { +define amdgpu_kernel void @merge_global_store_5_constants_i32(i32 addrspace(1)* %out) { store i32 9, i32 addrspace(1)* %out, align 4 %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1 store i32 12, i32 addrspace(1)* %idx1, align 4 @@ -537,7 +537,7 @@ define void @merge_global_store_5_constants_i32(i32 addrspace(1)* %out) { ; CHECK-LABEL: @merge_global_store_6_constants_i32 ; CHECK: store <4 x i32> <i32 13, i32 15, i32 62, i32 63>, <4 x i32> addrspace(1)* %{{[0-9]+}}, align 4 ; CHECK: store <2 x i32> <i32 11, i32 123>, <2 x i32> addrspace(1)* %{{[0-9]+}}, align 4 -define void @merge_global_store_6_constants_i32(i32 addrspace(1)* %out) { +define amdgpu_kernel void @merge_global_store_6_constants_i32(i32 addrspace(1)* %out) { store i32 13, i32 addrspace(1)* %out, align 4 %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1 store i32 15, i32 addrspace(1)* %idx1, align 4 @@ -555,7 +555,7 @@ define void @merge_global_store_6_constants_i32(i32 addrspace(1)* %out) { ; CHECK-LABEL: @merge_global_store_7_constants_i32 ; CHECK: store <4 x i32> <i32 34, i32 999, i32 65, i32 33>, <4 x i32> addrspace(1)* %{{[0-9]+}}, align 4 ; CHECK: store <3 x i32> <i32 98, i32 91, i32 212>, <3 x i32> addrspace(1)* %{{[0-9]+}}, align 4 -define void @merge_global_store_7_constants_i32(i32 addrspace(1)* %out) { +define amdgpu_kernel void @merge_global_store_7_constants_i32(i32 addrspace(1)* %out) { store i32 34, i32 addrspace(1)* %out, align 4 %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1 store i32 999, i32 addrspace(1)* %idx1, align 4 @@ -575,7 +575,7 @@ define void @merge_global_store_7_constants_i32(i32 addrspace(1)* %out) { ; CHECK-LABEL: @merge_global_store_8_constants_i32 ; CHECK: store <4 x i32> <i32 34, i32 999, i32 65, i32 33>, <4 x i32> addrspace(1)* %{{[0-9]+}}, align 4 ; CHECK: store <4 x i32> <i32 98, i32 91, i32 212, i32 999>, <4 x i32> addrspace(1)* %{{[0-9]+}}, align 4 -define void @merge_global_store_8_constants_i32(i32 addrspace(1)* %out) { +define amdgpu_kernel void @merge_global_store_8_constants_i32(i32 addrspace(1)* %out) { store i32 34, i32 addrspace(1)* %out, align 4 %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1 store i32 999, i32 addrspace(1)* %idx1, align 4 @@ -597,7 +597,7 @@ define void @merge_global_store_8_constants_i32(i32 addrspace(1)* %out) { ; CHECK-LABEL: @copy_v3i32_align4 ; CHECK: %vec = load <3 x i32>, <3 x i32> addrspace(1)* %in, align 4 ; CHECK: store <3 x i32> %vec, <3 x i32> addrspace(1)* %out -define void @copy_v3i32_align4(<3 x i32> addrspace(1)* noalias %out, <3 x i32> addrspace(1)* noalias %in) #0 { +define amdgpu_kernel void @copy_v3i32_align4(<3 x i32> addrspace(1)* noalias %out, <3 x i32> addrspace(1)* noalias %in) #0 { %vec = load <3 x i32>, <3 x i32> addrspace(1)* %in, align 4 store <3 x i32> %vec, <3 x i32> addrspace(1)* %out ret void @@ -606,7 +606,7 @@ define void @copy_v3i32_align4(<3 x i32> addrspace(1)* noalias %out, <3 x i32> a ; CHECK-LABEL: @copy_v3i64_align4 ; CHECK: %vec = load <3 x i64>, <3 x i64> addrspace(1)* %in, align 4 ; CHECK: store <3 x i64> %vec, <3 x i64> addrspace(1)* %out -define void @copy_v3i64_align4(<3 x i64> addrspace(1)* noalias %out, <3 x i64> addrspace(1)* noalias %in) #0 { +define amdgpu_kernel void @copy_v3i64_align4(<3 x i64> addrspace(1)* noalias %out, <3 x i64> addrspace(1)* noalias %in) #0 { %vec = load <3 x i64>, <3 x i64> addrspace(1)* %in, align 4 store <3 x i64> %vec, <3 x i64> addrspace(1)* %out ret void @@ -615,7 +615,7 @@ define void @copy_v3i64_align4(<3 x i64> addrspace(1)* noalias %out, <3 x i64> a ; CHECK-LABEL: @copy_v3f32_align4 ; CHECK: %vec = load <3 x float>, <3 x float> addrspace(1)* %in, align 4 ; CHECK: store <3 x float> -define void @copy_v3f32_align4(<3 x float> addrspace(1)* noalias %out, <3 x float> addrspace(1)* noalias %in) #0 { +define amdgpu_kernel void @copy_v3f32_align4(<3 x float> addrspace(1)* noalias %out, <3 x float> addrspace(1)* noalias %in) #0 { %vec = load <3 x float>, <3 x float> addrspace(1)* %in, align 4 %fadd = fadd <3 x float> %vec, <float 1.0, float 2.0, float 4.0> store <3 x float> %fadd, <3 x float> addrspace(1)* %out @@ -625,7 +625,7 @@ define void @copy_v3f32_align4(<3 x float> addrspace(1)* noalias %out, <3 x floa ; CHECK-LABEL: @copy_v3f64_align4 ; CHECK: %vec = load <3 x double>, <3 x double> addrspace(1)* %in, align 4 ; CHECK: store <3 x double> %fadd, <3 x double> addrspace(1)* %out -define void @copy_v3f64_align4(<3 x double> addrspace(1)* noalias %out, <3 x double> addrspace(1)* noalias %in) #0 { +define amdgpu_kernel void @copy_v3f64_align4(<3 x double> addrspace(1)* noalias %out, <3 x double> addrspace(1)* noalias %in) #0 { %vec = load <3 x double>, <3 x double> addrspace(1)* %in, align 4 %fadd = fadd <3 x double> %vec, <double 1.0, double 2.0, double 4.0> store <3 x double> %fadd, <3 x double> addrspace(1)* %out diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-vectors.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-vectors.ll index 8885d61014f..226147df66a 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-vectors.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-vectors.ll @@ -5,7 +5,7 @@ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24: ; CHECK-LABEL: @merge_v2i32_v2i32( ; CHECK: load <4 x i32> ; CHECK: store <4 x i32> zeroinitializer -define void @merge_v2i32_v2i32(<2 x i32> addrspace(1)* nocapture %a, <2 x i32> addrspace(1)* nocapture readonly %b) #0 { +define amdgpu_kernel void @merge_v2i32_v2i32(<2 x i32> addrspace(1)* nocapture %a, <2 x i32> addrspace(1)* nocapture readonly %b) #0 { entry: %a.1 = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(1)* %a, i64 1 %b.1 = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(1)* %b, i64 1 @@ -22,7 +22,7 @@ entry: ; CHECK-LABEL: @merge_v1i32_v1i32( ; CHECK: load <2 x i32> ; CHECK: store <2 x i32> zeroinitializer -define void @merge_v1i32_v1i32(<1 x i32> addrspace(1)* nocapture %a, <1 x i32> addrspace(1)* nocapture readonly %b) #0 { +define amdgpu_kernel void @merge_v1i32_v1i32(<1 x i32> addrspace(1)* nocapture %a, <1 x i32> addrspace(1)* nocapture readonly %b) #0 { entry: %a.1 = getelementptr inbounds <1 x i32>, <1 x i32> addrspace(1)* %a, i64 1 %b.1 = getelementptr inbounds <1 x i32>, <1 x i32> addrspace(1)* %b, i64 1 @@ -41,7 +41,7 @@ entry: ; CHECK: load <3 x i32> ; CHECK: store <3 x i32> zeroinitializer ; CHECK: store <3 x i32> zeroinitializer -define void @no_merge_v3i32_v3i32(<3 x i32> addrspace(1)* nocapture %a, <3 x i32> addrspace(1)* nocapture readonly %b) #0 { +define amdgpu_kernel void @no_merge_v3i32_v3i32(<3 x i32> addrspace(1)* nocapture %a, <3 x i32> addrspace(1)* nocapture readonly %b) #0 { entry: %a.1 = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %a, i64 1 %b.1 = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %b, i64 1 @@ -58,7 +58,7 @@ entry: ; CHECK-LABEL: @merge_v2i16_v2i16( ; CHECK: load <4 x i16> ; CHECK: store <4 x i16> zeroinitializer -define void @merge_v2i16_v2i16(<2 x i16> addrspace(1)* nocapture %a, <2 x i16> addrspace(1)* nocapture readonly %b) #0 { +define amdgpu_kernel void @merge_v2i16_v2i16(<2 x i16> addrspace(1)* nocapture %a, <2 x i16> addrspace(1)* nocapture readonly %b) #0 { entry: %a.1 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %a, i64 1 %b.1 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %b, i64 1 @@ -76,7 +76,7 @@ entry: ; CHECK-LABEL: @merge_load_i32_v2i16( ; CHECK: load i32, ; CHECK: load <2 x i16> -define void @merge_load_i32_v2i16(i32 addrspace(1)* nocapture %a) #0 { +define amdgpu_kernel void @merge_load_i32_v2i16(i32 addrspace(1)* nocapture %a) #0 { entry: %a.1 = getelementptr inbounds i32, i32 addrspace(1)* %a, i32 1 %a.1.cast = bitcast i32 addrspace(1)* %a.1 to <2 x i16> addrspace(1)* diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/missing-alignment.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/missing-alignment.ll index ba792f78353..f353106607d 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/missing-alignment.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/missing-alignment.ll @@ -7,7 +7,7 @@ ; CHECK-LABEL: @load_keep_base_alignment_missing_align( ; CHECK: load <2 x float>, <2 x float> addrspace(3)* %{{[0-9]+}}, align 4 -define void @load_keep_base_alignment_missing_align(float addrspace(1)* %out) { +define amdgpu_kernel void @load_keep_base_alignment_missing_align(float addrspace(1)* %out) { %ptr0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 11 %val0 = load float, float addrspace(3)* %ptr0 @@ -21,7 +21,7 @@ define void @load_keep_base_alignment_missing_align(float addrspace(1)* %out) { ; CHECK-LABEL: @store_keep_base_alignment_missing_align( ; CHECK: store <2 x float> zeroinitializer, <2 x float> addrspace(3)* %{{[0-9]+}}, align 4 -define void @store_keep_base_alignment_missing_align() { +define amdgpu_kernel void @store_keep_base_alignment_missing_align() { %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 1 %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 2 store float 0.0, float addrspace(3)* %arrayidx0 diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/multiple_tails.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/multiple_tails.ll index 88eca363902..8a78f3d7e9b 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/multiple_tails.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/multiple_tails.ll @@ -11,7 +11,7 @@ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64: ; CHECK: store i32 0 ; CHECK: store i32 0 -define void @no_crash(i32 %arg) { +define amdgpu_kernel void @no_crash(i32 %arg) { %tmp2 = add i32 %arg, 14 %tmp3 = getelementptr [16384 x i32], [16384 x i32] addrspace(3)* @0, i32 0, i32 %tmp2 %tmp4 = add i32 %arg, 15 @@ -37,7 +37,7 @@ define void @no_crash(i32 %arg) { ; CHECK: load i32 ; CHECK: load i32 -define void @interleave_get_longest(i32 %arg) { +define amdgpu_kernel void @interleave_get_longest(i32 %arg) { %a1 = add i32 %arg, 1 %a2 = add i32 %arg, 2 %a3 = add i32 %arg, 3 diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/no-implicit-float.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/no-implicit-float.ll index 4a429533df0..818189565b4 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/no-implicit-float.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/no-implicit-float.ll @@ -5,7 +5,7 @@ ; CHECK: store i32 ; CHECK: store i32 ; CHECK: store i32 -define void @no_implicit_float(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @no_implicit_float(i32 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2 %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3 diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/optnone.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/optnone.ll index 141e20a1f83..28d29f8e813 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/optnone.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/optnone.ll @@ -3,7 +3,7 @@ ; CHECK-LABEL: @optnone( ; CHECK: store i32 ; CHECK: store i32 -define void @optnone(i32 addrspace(1)* %out) noinline optnone { +define amdgpu_kernel void @optnone(i32 addrspace(1)* %out) noinline optnone { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 store i32 123, i32 addrspace(1)* %out.gep.1 @@ -13,7 +13,7 @@ define void @optnone(i32 addrspace(1)* %out) noinline optnone { ; CHECK-LABEL: @do_opt( ; CHECK: store <2 x i32> -define void @do_opt(i32 addrspace(1)* %out) { +define amdgpu_kernel void @do_opt(i32 addrspace(1)* %out) { %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1 store i32 123, i32 addrspace(1)* %out.gep.1 diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/pointer-elements.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/pointer-elements.ll index 202e988ea5f..65200b95d5e 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/pointer-elements.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/pointer-elements.ll @@ -9,7 +9,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1 ; CHECK: inttoptr i64 %{{[^ ]+}} to i8 addrspace(1)* ; CHECK: inttoptr i64 %{{[^ ]+}} to i8 addrspace(1)* ; CHECK: store <2 x i64> zeroinitializer -define void @merge_v2p1i8(i8 addrspace(1)* addrspace(1)* nocapture %a, i8 addrspace(1)* addrspace(1)* nocapture readonly %b) #0 { +define amdgpu_kernel void @merge_v2p1i8(i8 addrspace(1)* addrspace(1)* nocapture %a, i8 addrspace(1)* addrspace(1)* nocapture readonly %b) #0 { entry: %a.1 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a, i64 1 %b.1 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %b, i64 1 @@ -28,7 +28,7 @@ entry: ; CHECK: inttoptr i32 %{{[^ ]+}} to i8 addrspace(3)* ; CHECK: inttoptr i32 %{{[^ ]+}} to i8 addrspace(3)* ; CHECK: store <2 x i32> zeroinitializer -define void @merge_v2p3i8(i8 addrspace(3)* addrspace(3)* nocapture %a, i8 addrspace(3)* addrspace(3)* nocapture readonly %b) #0 { +define amdgpu_kernel void @merge_v2p3i8(i8 addrspace(3)* addrspace(3)* nocapture %a, i8 addrspace(3)* addrspace(3)* nocapture readonly %b) #0 { entry: %a.1 = getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %a, i64 1 %b.1 = getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %b, i64 1 @@ -46,7 +46,7 @@ entry: ; CHECK: load <2 x i64> ; CHECK: [[ELT1:%[^ ]+]] = extractelement <2 x i64> %{{[^ ]+}}, i32 1 ; CHECK: inttoptr i64 [[ELT1]] to i8 addrspace(1)* -define void @merge_load_i64_ptr64(i64 addrspace(1)* nocapture %a) #0 { +define amdgpu_kernel void @merge_load_i64_ptr64(i64 addrspace(1)* nocapture %a) #0 { entry: %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1 %a.1.cast = bitcast i64 addrspace(1)* %a.1 to i8 addrspace(1)* addrspace(1)* @@ -61,7 +61,7 @@ entry: ; CHECK: load <2 x i64> ; CHECK: [[ELT0:%[^ ]+]] = extractelement <2 x i64> %{{[^ ]+}}, i32 0 ; CHECK: inttoptr i64 [[ELT0]] to i8 addrspace(1)* -define void @merge_load_ptr64_i64(i64 addrspace(1)* nocapture %a) #0 { +define amdgpu_kernel void @merge_load_ptr64_i64(i64 addrspace(1)* nocapture %a) #0 { entry: %a.cast = bitcast i64 addrspace(1)* %a to i8 addrspace(1)* addrspace(1)* %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1 @@ -76,7 +76,7 @@ entry: ; CHECK: [[ELT0:%[^ ]+]] = ptrtoint i8 addrspace(1)* %ptr0 to i64 ; CHECK: insertelement <2 x i64> undef, i64 [[ELT0]], i32 0 ; CHECK: store <2 x i64> -define void @merge_store_ptr64_i64(i64 addrspace(1)* nocapture %a, i8 addrspace(1)* %ptr0, i64 %val1) #0 { +define amdgpu_kernel void @merge_store_ptr64_i64(i64 addrspace(1)* nocapture %a, i8 addrspace(1)* %ptr0, i64 %val1) #0 { entry: %a.cast = bitcast i64 addrspace(1)* %a to i8 addrspace(1)* addrspace(1)* %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1 @@ -92,7 +92,7 @@ entry: ; CHECK: [[ELT1:%[^ ]+]] = ptrtoint i8 addrspace(1)* %ptr1 to i64 ; CHECK: insertelement <2 x i64> %{{[^ ]+}}, i64 [[ELT1]], i32 1 ; CHECK: store <2 x i64> -define void @merge_store_i64_ptr64(i8 addrspace(1)* addrspace(1)* nocapture %a, i64 %val0, i8 addrspace(1)* %ptr1) #0 { +define amdgpu_kernel void @merge_store_i64_ptr64(i8 addrspace(1)* addrspace(1)* nocapture %a, i64 %val0, i8 addrspace(1)* %ptr1) #0 { entry: %a.1 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a, i64 1 %a.cast = bitcast i8 addrspace(1)* addrspace(1)* %a to i64 addrspace(1)* @@ -107,7 +107,7 @@ entry: ; CHECK: load <2 x i32> ; CHECK: [[ELT1:%[^ ]+]] = extractelement <2 x i32> %{{[^ ]+}}, i32 1 ; CHECK: inttoptr i32 [[ELT1]] to i8 addrspace(3)* -define void @merge_load_i32_ptr32(i32 addrspace(3)* nocapture %a) #0 { +define amdgpu_kernel void @merge_load_i32_ptr32(i32 addrspace(3)* nocapture %a) #0 { entry: %a.1 = getelementptr inbounds i32, i32 addrspace(3)* %a, i32 1 %a.1.cast = bitcast i32 addrspace(3)* %a.1 to i8 addrspace(3)* addrspace(3)* @@ -122,7 +122,7 @@ entry: ; CHECK: load <2 x i32> ; CHECK: [[ELT0:%[^ ]+]] = extractelement <2 x i32> %{{[^ ]+}}, i32 0 ; CHECK: inttoptr i32 [[ELT0]] to i8 addrspace(3)* -define void @merge_load_ptr32_i32(i32 addrspace(3)* nocapture %a) #0 { +define amdgpu_kernel void @merge_load_ptr32_i32(i32 addrspace(3)* nocapture %a) #0 { entry: %a.cast = bitcast i32 addrspace(3)* %a to i8 addrspace(3)* addrspace(3)* %a.1 = getelementptr inbounds i32, i32 addrspace(3)* %a, i32 1 @@ -137,7 +137,7 @@ entry: ; CHECK: [[ELT0:%[^ ]+]] = ptrtoint i8 addrspace(3)* %ptr0 to i32 ; CHECK: insertelement <2 x i32> undef, i32 [[ELT0]], i32 0 ; CHECK: store <2 x i32> -define void @merge_store_ptr32_i32(i32 addrspace(3)* nocapture %a, i8 addrspace(3)* %ptr0, i32 %val1) #0 { +define amdgpu_kernel void @merge_store_ptr32_i32(i32 addrspace(3)* nocapture %a, i8 addrspace(3)* %ptr0, i32 %val1) #0 { entry: %a.cast = bitcast i32 addrspace(3)* %a to i8 addrspace(3)* addrspace(3)* %a.1 = getelementptr inbounds i32, i32 addrspace(3)* %a, i32 1 @@ -152,7 +152,7 @@ entry: ; CHECK: [[ELT1:%[^ ]+]] = ptrtoint i8 addrspace(3)* %ptr1 to i32 ; CHECK: insertelement <2 x i32> %{{[^ ]+}}, i32 [[ELT1]], i32 1 ; CHECK: store <2 x i32> -define void @merge_store_i32_ptr32(i8 addrspace(3)* addrspace(3)* nocapture %a, i32 %val0, i8 addrspace(3)* %ptr1) #0 { +define amdgpu_kernel void @merge_store_i32_ptr32(i8 addrspace(3)* addrspace(3)* nocapture %a, i32 %val0, i8 addrspace(3)* %ptr1) #0 { entry: %a.1 = getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %a, i32 1 %a.cast = bitcast i8 addrspace(3)* addrspace(3)* %a to i32 addrspace(3)* @@ -166,7 +166,7 @@ entry: ; CHECK-LABEL: @no_merge_store_ptr32_i64( ; CHECK: store i8 addrspace(3)* ; CHECK: store i64 -define void @no_merge_store_ptr32_i64(i64 addrspace(1)* nocapture %a, i8 addrspace(3)* %ptr0, i64 %val1) #0 { +define amdgpu_kernel void @no_merge_store_ptr32_i64(i64 addrspace(1)* nocapture %a, i8 addrspace(3)* %ptr0, i64 %val1) #0 { entry: %a.cast = bitcast i64 addrspace(1)* %a to i8 addrspace(3)* addrspace(1)* %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1 @@ -181,7 +181,7 @@ entry: ; CHECK-LABEL: @no_merge_store_i64_ptr32( ; CHECK: store i64 ; CHECK: store i8 addrspace(3)* -define void @no_merge_store_i64_ptr32(i8 addrspace(3)* addrspace(1)* nocapture %a, i64 %val0, i8 addrspace(3)* %ptr1) #0 { +define amdgpu_kernel void @no_merge_store_i64_ptr32(i8 addrspace(3)* addrspace(1)* nocapture %a, i64 %val0, i8 addrspace(3)* %ptr1) #0 { entry: %a.1 = getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(1)* %a, i64 1 %a.cast = bitcast i8 addrspace(3)* addrspace(1)* %a to i64 addrspace(1)* @@ -195,7 +195,7 @@ entry: ; CHECK-LABEL: @no_merge_load_i64_ptr32( ; CHECK: load i64, ; CHECK: load i8 addrspace(3)*, -define void @no_merge_load_i64_ptr32(i64 addrspace(1)* nocapture %a) #0 { +define amdgpu_kernel void @no_merge_load_i64_ptr32(i64 addrspace(1)* nocapture %a) #0 { entry: %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1 %a.1.cast = bitcast i64 addrspace(1)* %a.1 to i8 addrspace(3)* addrspace(1)* @@ -209,7 +209,7 @@ entry: ; CHECK-LABEL: @no_merge_load_ptr32_i64( ; CHECK: load i8 addrspace(3)*, ; CHECK: load i64, -define void @no_merge_load_ptr32_i64(i64 addrspace(1)* nocapture %a) #0 { +define amdgpu_kernel void @no_merge_load_ptr32_i64(i64 addrspace(1)* nocapture %a) #0 { entry: %a.cast = bitcast i64 addrspace(1)* %a to i8 addrspace(3)* addrspace(1)* %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1 @@ -226,7 +226,7 @@ entry: ; CHECK: load <2 x i8 addrspace(1)*> ; CHECK: store <2 x i8 addrspace(1)*> ; CHECK: store <2 x i8 addrspace(1)*> -define void @merge_v2p1i8_v2p1i8(<2 x i8 addrspace(1)*> addrspace(1)* nocapture noalias %a, <2 x i8 addrspace(1)*> addrspace(1)* nocapture readonly noalias %b) #0 { +define amdgpu_kernel void @merge_v2p1i8_v2p1i8(<2 x i8 addrspace(1)*> addrspace(1)* nocapture noalias %a, <2 x i8 addrspace(1)*> addrspace(1)* nocapture readonly noalias %b) #0 { entry: %a.1 = getelementptr inbounds <2 x i8 addrspace(1)*>, <2 x i8 addrspace(1)*> addrspace(1)* %a, i64 1 %b.1 = getelementptr inbounds <2 x i8 addrspace(1)*>, <2 x i8 addrspace(1)*> addrspace(1)* %b, i64 1 @@ -245,7 +245,7 @@ entry: ; CHECK: [[ELT0_INT:%[^ ]+]] = inttoptr i64 [[ELT0]] to i8 addrspace(1)* ; CHECK: [[ELT1_INT:%[^ ]+]] = extractelement <2 x i64> %{{[^ ]+}}, i32 1 ; CHECK: bitcast i64 [[ELT1_INT]] to double -define void @merge_load_ptr64_f64(double addrspace(1)* nocapture %a) #0 { +define amdgpu_kernel void @merge_load_ptr64_f64(double addrspace(1)* nocapture %a) #0 { entry: %a.cast = bitcast double addrspace(1)* %a to i8 addrspace(1)* addrspace(1)* %a.1 = getelementptr inbounds double, double addrspace(1)* %a, i64 1 @@ -262,7 +262,7 @@ entry: ; CHECK: bitcast i64 [[ELT0]] to double ; CHECK: [[ELT1:%[^ ]+]] = extractelement <2 x i64> %{{[^ ]+}}, i32 1 ; CHECK: inttoptr i64 [[ELT1]] to i8 addrspace(1)* -define void @merge_load_f64_ptr64(double addrspace(1)* nocapture %a) #0 { +define amdgpu_kernel void @merge_load_f64_ptr64(double addrspace(1)* nocapture %a) #0 { entry: %a.1 = getelementptr inbounds double, double addrspace(1)* %a, i64 1 %a.1.cast = bitcast double addrspace(1)* %a.1 to i8 addrspace(1)* addrspace(1)* @@ -279,7 +279,7 @@ entry: ; CHECK: [[ELT1_INT:%[^ ]+]] = bitcast double %val1 to i64 ; CHECK: insertelement <2 x i64> %{{[^ ]+}}, i64 [[ELT1_INT]], i32 1 ; CHECK: store <2 x i64> -define void @merge_store_ptr64_f64(double addrspace(1)* nocapture %a, i8 addrspace(1)* %ptr0, double %val1) #0 { +define amdgpu_kernel void @merge_store_ptr64_f64(double addrspace(1)* nocapture %a, i8 addrspace(1)* %ptr0, double %val1) #0 { entry: %a.cast = bitcast double addrspace(1)* %a to i8 addrspace(1)* addrspace(1)* %a.1 = getelementptr inbounds double, double addrspace(1)* %a, i64 1 @@ -296,7 +296,7 @@ entry: ; CHECK: [[ELT1_INT:%[^ ]+]] = ptrtoint i8 addrspace(1)* %ptr1 to i64 ; CHECK: insertelement <2 x i64> %{{[^ ]+}}, i64 [[ELT1_INT]], i32 1 ; CHECK: store <2 x i64> -define void @merge_store_f64_ptr64(i8 addrspace(1)* addrspace(1)* nocapture %a, double %val0, i8 addrspace(1)* %ptr1) #0 { +define amdgpu_kernel void @merge_store_f64_ptr64(i8 addrspace(1)* addrspace(1)* nocapture %a, double %val0, i8 addrspace(1)* %ptr1) #0 { entry: %a.1 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a, i64 1 %a.cast = bitcast i8 addrspace(1)* addrspace(1)* %a to double addrspace(1)* diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/store_with_aliasing_load.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/store_with_aliasing_load.ll index d70c449e14d..63e688e63fb 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/store_with_aliasing_load.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/store_with_aliasing_load.ll @@ -9,7 +9,7 @@ ; CHECK: store <4 x float> ; Function Attrs: nounwind -define void @store_vectorize_with_alias(i8 addrspace(1)* %a, i8 addrspace(1)* %b) #0 { +define amdgpu_kernel void @store_vectorize_with_alias(i8 addrspace(1)* %a, i8 addrspace(1)* %b) #0 { bb: %tmp = bitcast i8 addrspace(1)* %b to float addrspace(1)* %tmp1 = load float, float addrspace(1)* %tmp, align 4 diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/weird-type-accesses.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/weird-type-accesses.ll index 18f62be27c8..412d2013f6b 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/weird-type-accesses.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/weird-type-accesses.ll @@ -16,7 +16,7 @@ declare void @use_v2i9(<2 x i9>) ; CHECK-LABEL: @merge_store_2_constants_i1( ; CHECK: store i1 ; CHECK: store i1 -define void @merge_store_2_constants_i1(i1 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_store_2_constants_i1(i1 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i1, i1 addrspace(1)* %out, i32 1 store i1 true, i1 addrspace(1)* %out.gep.1 store i1 false, i1 addrspace(1)* %out @@ -26,7 +26,7 @@ define void @merge_store_2_constants_i1(i1 addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_store_2_constants_i2( ; CHECK: store i2 1 ; CHECK: store i2 -1 -define void @merge_store_2_constants_i2(i2 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_store_2_constants_i2(i2 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i2, i2 addrspace(1)* %out, i32 1 store i2 1, i2 addrspace(1)* %out.gep.1 store i2 -1, i2 addrspace(1)* %out @@ -36,7 +36,7 @@ define void @merge_store_2_constants_i2(i2 addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_different_store_sizes_i1_i8( ; CHECK: store i1 true ; CHECK: store i8 123 -define void @merge_different_store_sizes_i1_i8(i8 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_different_store_sizes_i1_i8(i8 addrspace(1)* %out) #0 { %out.i1 = bitcast i8 addrspace(1)* %out to i1 addrspace(1)* %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i32 1 store i1 true, i1 addrspace(1)* %out.i1 @@ -47,7 +47,7 @@ define void @merge_different_store_sizes_i1_i8(i8 addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_different_store_sizes_i8_i1( ; CHECK: store i8 123 ; CHECK: store i1 true -define void @merge_different_store_sizes_i8_i1(i1 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_different_store_sizes_i8_i1(i1 addrspace(1)* %out) #0 { %out.i8 = bitcast i1 addrspace(1)* %out to i8 addrspace(1)* %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out.i8, i32 1 store i8 123, i8 addrspace(1)* %out.gep.1 @@ -58,7 +58,7 @@ define void @merge_different_store_sizes_i8_i1(i1 addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_store_2_constant_structs( ; CHECK: store %struct.foo ; CHECK: store %struct.foo -define void @merge_store_2_constant_structs(%struct.foo addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_store_2_constant_structs(%struct.foo addrspace(1)* %out) #0 { %out.gep.1 = getelementptr %struct.foo, %struct.foo addrspace(1)* %out, i32 1 store %struct.foo { i32 12, i8 3 }, %struct.foo addrspace(1)* %out.gep.1 store %struct.foo { i32 92, i8 9 }, %struct.foo addrspace(1)* %out @@ -69,7 +69,7 @@ define void @merge_store_2_constant_structs(%struct.foo addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_store_2_constants_v2i2( ; CHECK: store <2 x i2> ; CHECK: store <2 x i2> -define void @merge_store_2_constants_v2i2(<2 x i2> addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_store_2_constants_v2i2(<2 x i2> addrspace(1)* %out) #0 { %out.gep.1 = getelementptr <2 x i2>, <2 x i2> addrspace(1)* %out, i32 1 store <2 x i2> <i2 1, i2 -1>, <2 x i2> addrspace(1)* %out.gep.1 store <2 x i2> <i2 -1, i2 1>, <2 x i2> addrspace(1)* %out @@ -81,7 +81,7 @@ define void @merge_store_2_constants_v2i2(<2 x i2> addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_store_2_constants_v4i2( ; CHECK: store <4 x i2> ; CHECK: store <4 x i2> -define void @merge_store_2_constants_v4i2(<4 x i2> addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_store_2_constants_v4i2(<4 x i2> addrspace(1)* %out) #0 { %out.gep.1 = getelementptr <4 x i2>, <4 x i2> addrspace(1)* %out, i32 1 store <4 x i2> <i2 1, i2 -1, i2 1, i2 -1>, <4 x i2> addrspace(1)* %out.gep.1 store <4 x i2> <i2 -1, i2 1, i2 -1, i2 1>, <4 x i2> addrspace(1)* %out @@ -91,7 +91,7 @@ define void @merge_store_2_constants_v4i2(<4 x i2> addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_load_2_constants_i1( ; CHECK: load i1 ; CHECK: load i1 -define void @merge_load_2_constants_i1(i1 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_load_2_constants_i1(i1 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i1, i1 addrspace(1)* %out, i32 1 %x = load i1, i1 addrspace(1)* %out.gep.1 %y = load i1, i1 addrspace(1)* %out @@ -103,7 +103,7 @@ define void @merge_load_2_constants_i1(i1 addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_load_2_constants_i2( ; CHECK: load i2 ; CHECK: load i2 -define void @merge_load_2_constants_i2(i2 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_load_2_constants_i2(i2 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i2, i2 addrspace(1)* %out, i32 1 %x = load i2, i2 addrspace(1)* %out.gep.1 %y = load i2, i2 addrspace(1)* %out @@ -115,7 +115,7 @@ define void @merge_load_2_constants_i2(i2 addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_different_load_sizes_i1_i8( ; CHECK: load i1 ; CHECK: load i8 -define void @merge_different_load_sizes_i1_i8(i8 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_different_load_sizes_i1_i8(i8 addrspace(1)* %out) #0 { %out.i1 = bitcast i8 addrspace(1)* %out to i1 addrspace(1)* %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i32 1 %x = load i1, i1 addrspace(1)* %out.i1 @@ -128,7 +128,7 @@ define void @merge_different_load_sizes_i1_i8(i8 addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_different_load_sizes_i8_i1( ; CHECK: load i8 ; CHECK: load i1 -define void @merge_different_load_sizes_i8_i1(i1 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_different_load_sizes_i8_i1(i1 addrspace(1)* %out) #0 { %out.i8 = bitcast i1 addrspace(1)* %out to i8 addrspace(1)* %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out.i8, i32 1 %x = load i8, i8 addrspace(1)* %out.gep.1 @@ -141,7 +141,7 @@ define void @merge_different_load_sizes_i8_i1(i1 addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_load_2_constant_structs( ; CHECK: load %struct.foo ; CHECK: load %struct.foo -define void @merge_load_2_constant_structs(%struct.foo addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_load_2_constant_structs(%struct.foo addrspace(1)* %out) #0 { %out.gep.1 = getelementptr %struct.foo, %struct.foo addrspace(1)* %out, i32 1 %x = load %struct.foo, %struct.foo addrspace(1)* %out.gep.1 %y = load %struct.foo, %struct.foo addrspace(1)* %out @@ -153,7 +153,7 @@ define void @merge_load_2_constant_structs(%struct.foo addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_load_2_constants_v2i2( ; CHECK: load <2 x i2> ; CHECK: load <2 x i2> -define void @merge_load_2_constants_v2i2(<2 x i2> addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_load_2_constants_v2i2(<2 x i2> addrspace(1)* %out) #0 { %out.gep.1 = getelementptr <2 x i2>, <2 x i2> addrspace(1)* %out, i32 1 %x = load <2 x i2>, <2 x i2> addrspace(1)* %out.gep.1 %y = load <2 x i2>, <2 x i2> addrspace(1)* %out @@ -165,7 +165,7 @@ define void @merge_load_2_constants_v2i2(<2 x i2> addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_load_2_constants_v4i2( ; CHECK: load <4 x i2> ; CHECK: load <4 x i2> -define void @merge_load_2_constants_v4i2(<4 x i2> addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_load_2_constants_v4i2(<4 x i2> addrspace(1)* %out) #0 { %out.gep.1 = getelementptr <4 x i2>, <4 x i2> addrspace(1)* %out, i32 1 %x = load <4 x i2>, <4 x i2> addrspace(1)* %out.gep.1 %y = load <4 x i2>, <4 x i2> addrspace(1)* %out @@ -177,7 +177,7 @@ define void @merge_load_2_constants_v4i2(<4 x i2> addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_store_2_constants_i9( ; CHECK: store i9 3 ; CHECK: store i9 -5 -define void @merge_store_2_constants_i9(i9 addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_store_2_constants_i9(i9 addrspace(1)* %out) #0 { %out.gep.1 = getelementptr i9, i9 addrspace(1)* %out, i32 1 store i9 3, i9 addrspace(1)* %out.gep.1 store i9 -5, i9 addrspace(1)* %out @@ -187,7 +187,7 @@ define void @merge_store_2_constants_i9(i9 addrspace(1)* %out) #0 { ; CHECK-LABEL: @merge_load_2_constants_v2i9( ; CHECK: load <2 x i9> ; CHECK: load <2 x i9> -define void @merge_load_2_constants_v2i9(<2 x i9> addrspace(1)* %out) #0 { +define amdgpu_kernel void @merge_load_2_constants_v2i9(<2 x i9> addrspace(1)* %out) #0 { %out.gep.1 = getelementptr <2 x i9>, <2 x i9> addrspace(1)* %out, i32 1 %x = load <2 x i9>, <2 x i9> addrspace(1)* %out.gep.1 %y = load <2 x i9>, <2 x i9> addrspace(1)* %out |