diff options
Diffstat (limited to 'llvm/test/Transforms')
15 files changed, 688 insertions, 691 deletions
diff --git a/llvm/test/Transforms/CodeGenPrepare/AMDGPU/sink-addrspacecast.ll b/llvm/test/Transforms/CodeGenPrepare/AMDGPU/sink-addrspacecast.ll index adeba26a6d4..e21392f7fc3 100644 --- a/llvm/test/Transforms/CodeGenPrepare/AMDGPU/sink-addrspacecast.ll +++ b/llvm/test/Transforms/CodeGenPrepare/AMDGPU/sink-addrspacecast.ll @@ -5,7 +5,7 @@ ; CHECK: br ; CHECK-NOT: addrspacecast define i64 @no_sink_local_to_flat(i1 %pred, i64 addrspace(3)* %ptr) { - %ptr_cast = addrspacecast i64 addrspace(3)* %ptr to i64 addrspace(4)* + %ptr_cast = addrspacecast i64 addrspace(3)* %ptr to i64* br i1 %pred, label %l1, label %l2 l1: @@ -13,7 +13,7 @@ l1: ret i64 %v1 l2: - %v2 = load i64, i64 addrspace(4)* %ptr_cast + %v2 = load i64, i64* %ptr_cast ret i64 %v2 } @@ -21,16 +21,16 @@ l2: ; CHECK: addrspacecast ; CHECK: br ; CHECK-NOT: addrspacecast -define i64 @no_sink_private_to_flat(i1 %pred, i64* %ptr) { - %ptr_cast = addrspacecast i64* %ptr to i64 addrspace(4)* +define i64 @no_sink_private_to_flat(i1 %pred, i64 addrspace(5)* %ptr) { + %ptr_cast = addrspacecast i64 addrspace(5)* %ptr to i64* br i1 %pred, label %l1, label %l2 l1: - %v1 = load i64, i64* %ptr + %v1 = load i64, i64 addrspace(5)* %ptr ret i64 %v1 l2: - %v2 = load i64, i64 addrspace(4)* %ptr_cast + %v2 = load i64, i64* %ptr_cast ret i64 %v2 } @@ -40,7 +40,7 @@ l2: ; CHECK: br ; CHECK: addrspacecast define i64 @sink_global_to_flat(i1 %pred, i64 addrspace(1)* %ptr) { - %ptr_cast = addrspacecast i64 addrspace(1)* %ptr to i64 addrspace(4)* + %ptr_cast = addrspacecast i64 addrspace(1)* %ptr to i64* br i1 %pred, label %l1, label %l2 l1: @@ -48,7 +48,7 @@ l1: ret i64 %v1 l2: - %v2 = load i64, i64 addrspace(4)* %ptr_cast + %v2 = load i64, i64* %ptr_cast ret i64 %v2 } @@ -56,12 +56,12 @@ l2: ; CHECK-NOT: addrspacecast ; CHECK: br ; CHECK: addrspacecast -define i64 @sink_flat_to_global(i1 %pred, i64 addrspace(4)* %ptr) { - %ptr_cast = addrspacecast i64 addrspace(4)* %ptr to i64 addrspace(1)* +define i64 @sink_flat_to_global(i1 %pred, i64* %ptr) { + %ptr_cast = addrspacecast i64* %ptr to i64 addrspace(1)* br i1 %pred, label %l1, label %l2 l1: - %v1 = load i64, i64 addrspace(4)* %ptr + %v1 = load i64, i64* %ptr ret i64 %v1 l2: @@ -73,12 +73,12 @@ l2: ; CHECK-NOT: addrspacecast ; CHECK: br ; CHECK: addrspacecast -define i64 @sink_flat_to_constant(i1 %pred, i64 addrspace(4)* %ptr) { - %ptr_cast = addrspacecast i64 addrspace(4)* %ptr to i64 addrspace(2)* +define i64 @sink_flat_to_constant(i1 %pred, i64* %ptr) { + %ptr_cast = addrspacecast i64* %ptr to i64 addrspace(2)* br i1 %pred, label %l1, label %l2 l1: - %v1 = load i64, i64 addrspace(4)* %ptr + %v1 = load i64, i64* %ptr ret i64 %v1 l2: @@ -90,12 +90,12 @@ l2: ; CHECK-NOT: addrspacecast ; CHECK: br ; CHECK: addrspacecast -define i64 @sink_flat_to_local(i1 %pred, i64 addrspace(4)* %ptr) { - %ptr_cast = addrspacecast i64 addrspace(4)* %ptr to i64 addrspace(3)* +define i64 @sink_flat_to_local(i1 %pred, i64* %ptr) { + %ptr_cast = addrspacecast i64* %ptr to i64 addrspace(3)* br i1 %pred, label %l1, label %l2 l1: - %v1 = load i64, i64 addrspace(4)* %ptr + %v1 = load i64, i64* %ptr ret i64 %v1 l2: @@ -107,15 +107,15 @@ l2: ; CHECK-NOT: addrspacecast ; CHECK: br ; CHECK: addrspacecast -define i64 @sink_flat_to_private(i1 %pred, i64 addrspace(4)* %ptr) { - %ptr_cast = addrspacecast i64 addrspace(4)* %ptr to i64* +define i64 @sink_flat_to_private(i1 %pred, i64* %ptr) { + %ptr_cast = addrspacecast i64* %ptr to i64 addrspace(5)* br i1 %pred, label %l1, label %l2 l1: - %v1 = load i64, i64 addrspace(4)* %ptr + %v1 = load i64, i64* %ptr ret i64 %v1 l2: - %v2 = load i64, i64* %ptr_cast + %v2 = load i64, i64 addrspace(5)* %ptr_cast ret i64 %v2 } diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll index 1eab7075403..f70c36ac7f7 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll @@ -3,69 +3,69 @@ ; Trivial optimization of generic addressing ; CHECK-LABEL: @load_global_from_flat( -; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(1)* +; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(1)* ; CHECK-NEXT: %tmp1 = load float, float addrspace(1)* %tmp0 ; CHECK-NEXT: ret float %tmp1 -define float @load_global_from_flat(float addrspace(4)* %generic_scalar) #0 { - %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(1)* +define float @load_global_from_flat(float* %generic_scalar) #0 { + %tmp0 = addrspacecast float* %generic_scalar to float addrspace(1)* %tmp1 = load float, float addrspace(1)* %tmp0 ret float %tmp1 } ; CHECK-LABEL: @load_constant_from_flat( -; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(2)* +; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(2)* ; CHECK-NEXT: %tmp1 = load float, float addrspace(2)* %tmp0 ; CHECK-NEXT: ret float %tmp1 -define float @load_constant_from_flat(float addrspace(4)* %generic_scalar) #0 { - %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(2)* +define float @load_constant_from_flat(float* %generic_scalar) #0 { + %tmp0 = addrspacecast float* %generic_scalar to float addrspace(2)* %tmp1 = load float, float addrspace(2)* %tmp0 ret float %tmp1 } ; CHECK-LABEL: @load_group_from_flat( -; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(3)* +; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(3)* ; CHECK-NEXT: %tmp1 = load float, float addrspace(3)* %tmp0 ; CHECK-NEXT: ret float %tmp1 -define float @load_group_from_flat(float addrspace(4)* %generic_scalar) #0 { - %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(3)* +define float @load_group_from_flat(float* %generic_scalar) #0 { + %tmp0 = addrspacecast float* %generic_scalar to float addrspace(3)* %tmp1 = load float, float addrspace(3)* %tmp0 ret float %tmp1 } ; CHECK-LABEL: @load_private_from_flat( -; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float* -; CHECK-NEXT: %tmp1 = load float, float* %tmp0 +; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(5)* +; CHECK-NEXT: %tmp1 = load float, float addrspace(5)* %tmp0 ; CHECK-NEXT: ret float %tmp1 -define float @load_private_from_flat(float addrspace(4)* %generic_scalar) #0 { - %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float* - %tmp1 = load float, float* %tmp0 +define float @load_private_from_flat(float* %generic_scalar) #0 { + %tmp0 = addrspacecast float* %generic_scalar to float addrspace(5)* + %tmp1 = load float, float addrspace(5)* %tmp0 ret float %tmp1 } ; CHECK-LABEL: @store_global_from_flat( -; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(1)* +; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(1)* ; CHECK-NEXT: store float 0.000000e+00, float addrspace(1)* %tmp0 -define amdgpu_kernel void @store_global_from_flat(float addrspace(4)* %generic_scalar) #0 { - %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(1)* +define amdgpu_kernel void @store_global_from_flat(float* %generic_scalar) #0 { + %tmp0 = addrspacecast float* %generic_scalar to float addrspace(1)* store float 0.0, float addrspace(1)* %tmp0 ret void } ; CHECK-LABEL: @store_group_from_flat( -; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(3)* +; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(3)* ; CHECK-NEXT: store float 0.000000e+00, float addrspace(3)* %tmp0 -define amdgpu_kernel void @store_group_from_flat(float addrspace(4)* %generic_scalar) #0 { - %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(3)* +define amdgpu_kernel void @store_group_from_flat(float* %generic_scalar) #0 { + %tmp0 = addrspacecast float* %generic_scalar to float addrspace(3)* store float 0.0, float addrspace(3)* %tmp0 ret void } ; CHECK-LABEL: @store_private_from_flat( -; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float* -; CHECK-NEXT: store float 0.000000e+00, float* %tmp0 -define amdgpu_kernel void @store_private_from_flat(float addrspace(4)* %generic_scalar) #0 { - %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float* - store float 0.0, float* %tmp0 +; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(5)* +; CHECK-NEXT: store float 0.000000e+00, float addrspace(5)* %tmp0 +define amdgpu_kernel void @store_private_from_flat(float* %generic_scalar) #0 { + %tmp0 = addrspacecast float* %generic_scalar to float addrspace(5)* + store float 0.0, float addrspace(5)* %tmp0 ret void } @@ -75,10 +75,10 @@ define amdgpu_kernel void @store_private_from_flat(float addrspace(4)* %generic_ ; CHECK-NEXT: store i32 %val, i32 addrspace(1)* %output, align 4 ; CHECK-NEXT: ret void define amdgpu_kernel void @load_store_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 { - %tmp0 = addrspacecast i32 addrspace(1)* %input to i32 addrspace(4)* - %tmp1 = addrspacecast i32 addrspace(1)* %output to i32 addrspace(4)* - %val = load i32, i32 addrspace(4)* %tmp0, align 4 - store i32 %val, i32 addrspace(4)* %tmp1, align 4 + %tmp0 = addrspacecast i32 addrspace(1)* %input to i32* + %tmp1 = addrspacecast i32 addrspace(1)* %output to i32* + %val = load i32, i32* %tmp0, align 4 + store i32 %val, i32* %tmp1, align 4 ret void } @@ -88,95 +88,95 @@ define amdgpu_kernel void @load_store_global(i32 addrspace(1)* nocapture %input, ; CHECK-NEXT: store i32 %val, i32 addrspace(3)* %output, align 4 ; CHECK-NEXT: ret void define amdgpu_kernel void @load_store_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 { - %tmp0 = addrspacecast i32 addrspace(3)* %input to i32 addrspace(4)* - %tmp1 = addrspacecast i32 addrspace(3)* %output to i32 addrspace(4)* - %val = load i32, i32 addrspace(4)* %tmp0, align 4 - store i32 %val, i32 addrspace(4)* %tmp1, align 4 + %tmp0 = addrspacecast i32 addrspace(3)* %input to i32* + %tmp1 = addrspacecast i32 addrspace(3)* %output to i32* + %val = load i32, i32* %tmp0, align 4 + store i32 %val, i32* %tmp1, align 4 ret void } ; Optimized to private load/store. ; CHECK-LABEL: @load_store_private( -; CHECK-NEXT: %val = load i32, i32* %input, align 4 -; CHECK-NEXT: store i32 %val, i32* %output, align 4 +; CHECK-NEXT: %val = load i32, i32 addrspace(5)* %input, align 4 +; CHECK-NEXT: store i32 %val, i32 addrspace(5)* %output, align 4 ; CHECK-NEXT: ret void -define amdgpu_kernel void @load_store_private(i32* nocapture %input, i32* nocapture %output) #0 { - %tmp0 = addrspacecast i32* %input to i32 addrspace(4)* - %tmp1 = addrspacecast i32* %output to i32 addrspace(4)* - %val = load i32, i32 addrspace(4)* %tmp0, align 4 - store i32 %val, i32 addrspace(4)* %tmp1, align 4 +define amdgpu_kernel void @load_store_private(i32 addrspace(5)* nocapture %input, i32 addrspace(5)* nocapture %output) #0 { + %tmp0 = addrspacecast i32 addrspace(5)* %input to i32* + %tmp1 = addrspacecast i32 addrspace(5)* %output to i32* + %val = load i32, i32* %tmp0, align 4 + store i32 %val, i32* %tmp1, align 4 ret void } ; No optimization. flat load/store. ; CHECK-LABEL: @load_store_flat( -; CHECK-NEXT: %val = load i32, i32 addrspace(4)* %input, align 4 -; CHECK-NEXT: store i32 %val, i32 addrspace(4)* %output, align 4 +; CHECK-NEXT: %val = load i32, i32* %input, align 4 +; CHECK-NEXT: store i32 %val, i32* %output, align 4 ; CHECK-NEXT: ret void -define amdgpu_kernel void @load_store_flat(i32 addrspace(4)* nocapture %input, i32 addrspace(4)* nocapture %output) #0 { - %val = load i32, i32 addrspace(4)* %input, align 4 - store i32 %val, i32 addrspace(4)* %output, align 4 +define amdgpu_kernel void @load_store_flat(i32* nocapture %input, i32* nocapture %output) #0 { + %val = load i32, i32* %input, align 4 + store i32 %val, i32* %output, align 4 ret void } ; CHECK-LABEL: @store_addrspacecast_ptr_value( -; CHECK: %cast = addrspacecast i32 addrspace(1)* %input to i32 addrspace(4)* -; CHECK-NEXT: store i32 addrspace(4)* %cast, i32 addrspace(4)* addrspace(1)* %output, align 4 -define amdgpu_kernel void @store_addrspacecast_ptr_value(i32 addrspace(1)* nocapture %input, i32 addrspace(4)* addrspace(1)* nocapture %output) #0 { - %cast = addrspacecast i32 addrspace(1)* %input to i32 addrspace(4)* - store i32 addrspace(4)* %cast, i32 addrspace(4)* addrspace(1)* %output, align 4 +; CHECK: %cast = addrspacecast i32 addrspace(1)* %input to i32* +; CHECK-NEXT: store i32* %cast, i32* addrspace(1)* %output, align 4 +define amdgpu_kernel void @store_addrspacecast_ptr_value(i32 addrspace(1)* nocapture %input, i32* addrspace(1)* nocapture %output) #0 { + %cast = addrspacecast i32 addrspace(1)* %input to i32* + store i32* %cast, i32* addrspace(1)* %output, align 4 ret void } ; CHECK-LABEL: @atomicrmw_add_global_to_flat( ; CHECK-NEXT: %ret = atomicrmw add i32 addrspace(1)* %global.ptr, i32 %y seq_cst define i32 @atomicrmw_add_global_to_flat(i32 addrspace(1)* %global.ptr, i32 %y) #0 { - %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)* - %ret = atomicrmw add i32 addrspace(4)* %cast, i32 %y seq_cst + %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32* + %ret = atomicrmw add i32* %cast, i32 %y seq_cst ret i32 %ret } ; CHECK-LABEL: @atomicrmw_add_group_to_flat( ; CHECK-NEXT: %ret = atomicrmw add i32 addrspace(3)* %group.ptr, i32 %y seq_cst define i32 @atomicrmw_add_group_to_flat(i32 addrspace(3)* %group.ptr, i32 %y) #0 { - %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)* - %ret = atomicrmw add i32 addrspace(4)* %cast, i32 %y seq_cst + %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32* + %ret = atomicrmw add i32* %cast, i32 %y seq_cst ret i32 %ret } ; CHECK-LABEL: @cmpxchg_global_to_flat( ; CHECK: %ret = cmpxchg i32 addrspace(1)* %global.ptr, i32 %cmp, i32 %val seq_cst monotonic define { i32, i1 } @cmpxchg_global_to_flat(i32 addrspace(1)* %global.ptr, i32 %cmp, i32 %val) #0 { - %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)* - %ret = cmpxchg i32 addrspace(4)* %cast, i32 %cmp, i32 %val seq_cst monotonic + %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32* + %ret = cmpxchg i32* %cast, i32 %cmp, i32 %val seq_cst monotonic ret { i32, i1 } %ret } ; CHECK-LABEL: @cmpxchg_group_to_flat( ; CHECK: %ret = cmpxchg i32 addrspace(3)* %group.ptr, i32 %cmp, i32 %val seq_cst monotonic define { i32, i1 } @cmpxchg_group_to_flat(i32 addrspace(3)* %group.ptr, i32 %cmp, i32 %val) #0 { - %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)* - %ret = cmpxchg i32 addrspace(4)* %cast, i32 %cmp, i32 %val seq_cst monotonic + %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32* + %ret = cmpxchg i32* %cast, i32 %cmp, i32 %val seq_cst monotonic ret { i32, i1 } %ret } ; Not pointer operand ; CHECK-LABEL: @cmpxchg_group_to_flat_wrong_operand( -; CHECK: %cast.cmp = addrspacecast i32 addrspace(3)* %cmp.ptr to i32 addrspace(4)* -; CHECK: %ret = cmpxchg i32 addrspace(4)* addrspace(3)* %cas.ptr, i32 addrspace(4)* %cast.cmp, i32 addrspace(4)* %val seq_cst monotonic -define { i32 addrspace(4)*, i1 } @cmpxchg_group_to_flat_wrong_operand(i32 addrspace(4)* addrspace(3)* %cas.ptr, i32 addrspace(3)* %cmp.ptr, i32 addrspace(4)* %val) #0 { - %cast.cmp = addrspacecast i32 addrspace(3)* %cmp.ptr to i32 addrspace(4)* - %ret = cmpxchg i32 addrspace(4)* addrspace(3)* %cas.ptr, i32 addrspace(4)* %cast.cmp, i32 addrspace(4)* %val seq_cst monotonic - ret { i32 addrspace(4)*, i1 } %ret +; CHECK: %cast.cmp = addrspacecast i32 addrspace(3)* %cmp.ptr to i32* +; CHECK: %ret = cmpxchg i32* addrspace(3)* %cas.ptr, i32* %cast.cmp, i32* %val seq_cst monotonic +define { i32*, i1 } @cmpxchg_group_to_flat_wrong_operand(i32* addrspace(3)* %cas.ptr, i32 addrspace(3)* %cmp.ptr, i32* %val) #0 { + %cast.cmp = addrspacecast i32 addrspace(3)* %cmp.ptr to i32* + %ret = cmpxchg i32* addrspace(3)* %cas.ptr, i32* %cast.cmp, i32* %val seq_cst monotonic + ret { i32*, i1 } %ret } ; Null pointer in local addr space ; CHECK-LABEL: @local_nullptr -; CHECK: icmp ne i8 addrspace(3)* %a, addrspacecast (i8* null to i8 addrspace(3)*) +; CHECK: icmp ne i8 addrspace(3)* %a, addrspacecast (i8 addrspace(5)* null to i8 addrspace(3)*) ; CHECK-NOT: i8 addrspace(3)* null define void @local_nullptr(i32 addrspace(1)* nocapture %results, i8 addrspace(3)* %a) { entry: - %tobool = icmp ne i8 addrspace(3)* %a, addrspacecast (i8* null to i8 addrspace(3)*) + %tobool = icmp ne i8 addrspace(3)* %a, addrspacecast (i8 addrspace(5)* null to i8 addrspace(3)*) %conv = zext i1 %tobool to i32 store i32 %conv, i32 addrspace(1)* %results, align 4 ret void diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/icmp.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/icmp.ll index b185ede2657..0a5e7a513e0 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/icmp.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/icmp.ll @@ -3,57 +3,57 @@ ; CHECK-LABEL: @icmp_flat_cmp_self( ; CHECK: %cmp = icmp eq i32 addrspace(3)* %group.ptr.0, %group.ptr.0 define i1 @icmp_flat_cmp_self(i32 addrspace(3)* %group.ptr.0) #0 { - %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* - %cmp = icmp eq i32 addrspace(4)* %cast0, %cast0 + %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* + %cmp = icmp eq i32* %cast0, %cast0 ret i1 %cmp } ; CHECK-LABEL: @icmp_flat_flat_from_group( ; CHECK: %cmp = icmp eq i32 addrspace(3)* %group.ptr.0, %group.ptr.1 define i1 @icmp_flat_flat_from_group(i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 { - %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* - %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)* - %cmp = icmp eq i32 addrspace(4)* %cast0, %cast1 + %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* + %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32* + %cmp = icmp eq i32* %cast0, %cast1 ret i1 %cmp } ; CHECK-LABEL: @icmp_mismatch_flat_from_group_private( -; CHECK: %1 = addrspacecast i32* %private.ptr.0 to i32 addrspace(4)* -; CHECK: %2 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)* -; CHECK: %cmp = icmp eq i32 addrspace(4)* %1, %2 -define i1 @icmp_mismatch_flat_from_group_private(i32* %private.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 { - %cast0 = addrspacecast i32* %private.ptr.0 to i32 addrspace(4)* - %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)* - %cmp = icmp eq i32 addrspace(4)* %cast0, %cast1 +; CHECK: %1 = addrspacecast i32 addrspace(5)* %private.ptr.0 to i32* +; CHECK: %2 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32* +; CHECK: %cmp = icmp eq i32* %1, %2 +define i1 @icmp_mismatch_flat_from_group_private(i32 addrspace(5)* %private.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 { + %cast0 = addrspacecast i32 addrspace(5)* %private.ptr.0 to i32* + %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32* + %cmp = icmp eq i32* %cast0, %cast1 ret i1 %cmp } ; CHECK-LABEL: @icmp_flat_group_flat( -; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* -; CHECK: %cmp = icmp eq i32 addrspace(4)* %1, %flat.ptr.1 -define i1 @icmp_flat_group_flat(i32 addrspace(3)* %group.ptr.0, i32 addrspace(4)* %flat.ptr.1) #0 { - %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* - %cmp = icmp eq i32 addrspace(4)* %cast0, %flat.ptr.1 +; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* +; CHECK: %cmp = icmp eq i32* %1, %flat.ptr.1 +define i1 @icmp_flat_group_flat(i32 addrspace(3)* %group.ptr.0, i32* %flat.ptr.1) #0 { + %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* + %cmp = icmp eq i32* %cast0, %flat.ptr.1 ret i1 %cmp } ; CHECK-LABEL: @icmp_flat_flat_group( -; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)* -; CHECK: %cmp = icmp eq i32 addrspace(4)* %flat.ptr.0, %1 -define i1 @icmp_flat_flat_group(i32 addrspace(4)* %flat.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 { - %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)* - %cmp = icmp eq i32 addrspace(4)* %flat.ptr.0, %cast1 +; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32* +; CHECK: %cmp = icmp eq i32* %flat.ptr.0, %1 +define i1 @icmp_flat_flat_group(i32* %flat.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 { + %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32* + %cmp = icmp eq i32* %flat.ptr.0, %cast1 ret i1 %cmp } ; Keeping as cmp addrspace(3)* is better ; CHECK-LABEL: @icmp_flat_to_group_cmp( -; CHECK: %cast0 = addrspacecast i32 addrspace(4)* %flat.ptr.0 to i32 addrspace(3)* -; CHECK: %cast1 = addrspacecast i32 addrspace(4)* %flat.ptr.1 to i32 addrspace(3)* +; CHECK: %cast0 = addrspacecast i32* %flat.ptr.0 to i32 addrspace(3)* +; CHECK: %cast1 = addrspacecast i32* %flat.ptr.1 to i32 addrspace(3)* ; CHECK: %cmp = icmp eq i32 addrspace(3)* %cast0, %cast1 -define i1 @icmp_flat_to_group_cmp(i32 addrspace(4)* %flat.ptr.0, i32 addrspace(4)* %flat.ptr.1) #0 { - %cast0 = addrspacecast i32 addrspace(4)* %flat.ptr.0 to i32 addrspace(3)* - %cast1 = addrspacecast i32 addrspace(4)* %flat.ptr.1 to i32 addrspace(3)* +define i1 @icmp_flat_to_group_cmp(i32* %flat.ptr.0, i32* %flat.ptr.1) #0 { + %cast0 = addrspacecast i32* %flat.ptr.0 to i32 addrspace(3)* + %cast1 = addrspacecast i32* %flat.ptr.1 to i32 addrspace(3)* %cmp = icmp eq i32 addrspace(3)* %cast0, %cast1 ret i1 %cmp } @@ -62,35 +62,35 @@ define i1 @icmp_flat_to_group_cmp(i32 addrspace(4)* %flat.ptr.0, i32 addrspace(4 ; constant cast if this is OK to change if 0 is a valid pointer. ; CHECK-LABEL: @icmp_group_flat_cmp_null( -; CHECK: %cmp = icmp eq i32 addrspace(3)* %group.ptr.0, addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*) +; CHECK: %cmp = icmp eq i32 addrspace(3)* %group.ptr.0, addrspacecast (i32* null to i32 addrspace(3)*) define i1 @icmp_group_flat_cmp_null(i32 addrspace(3)* %group.ptr.0) #0 { - %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* - %cmp = icmp eq i32 addrspace(4)* %cast0, null + %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* + %cmp = icmp eq i32* %cast0, null ret i1 %cmp } ; CHECK-LABEL: @icmp_group_flat_cmp_constant_inttoptr( -; CHECK: %cmp = icmp eq i32 addrspace(3)* %group.ptr.0, addrspacecast (i32 addrspace(4)* inttoptr (i64 400 to i32 addrspace(4)*) to i32 addrspace(3)*) +; CHECK: %cmp = icmp eq i32 addrspace(3)* %group.ptr.0, addrspacecast (i32* inttoptr (i64 400 to i32*) to i32 addrspace(3)*) define i1 @icmp_group_flat_cmp_constant_inttoptr(i32 addrspace(3)* %group.ptr.0) #0 { - %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* - %cmp = icmp eq i32 addrspace(4)* %cast0, inttoptr (i64 400 to i32 addrspace(4)*) + %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* + %cmp = icmp eq i32* %cast0, inttoptr (i64 400 to i32*) ret i1 %cmp } ; CHECK-LABEL: @icmp_mismatch_flat_group_private_cmp_null( -; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* -; CHECK: %cmp = icmp eq i32 addrspace(4)* %1, addrspacecast (i32* null to i32 addrspace(4)*) +; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* +; CHECK: %cmp = icmp eq i32* %1, addrspacecast (i32 addrspace(5)* null to i32*) define i1 @icmp_mismatch_flat_group_private_cmp_null(i32 addrspace(3)* %group.ptr.0) #0 { - %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* - %cmp = icmp eq i32 addrspace(4)* %cast0, addrspacecast (i32* null to i32 addrspace(4)*) + %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* + %cmp = icmp eq i32* %cast0, addrspacecast (i32 addrspace(5)* null to i32*) ret i1 %cmp } ; CHECK-LABEL: @icmp_mismatch_flat_group_private_cmp_undef( ; CHECK: %cmp = icmp eq i32 addrspace(3)* %group.ptr.0, undef define i1 @icmp_mismatch_flat_group_private_cmp_undef(i32 addrspace(3)* %group.ptr.0) #0 { - %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* - %cmp = icmp eq i32 addrspace(4)* %cast0, addrspacecast (i32* undef to i32 addrspace(4)*) + %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* + %cmp = icmp eq i32* %cast0, addrspacecast (i32 addrspace(5)* undef to i32*) ret i1 %cmp } @@ -98,62 +98,62 @@ define i1 @icmp_mismatch_flat_group_private_cmp_undef(i32 addrspace(3)* %group.p @global0 = internal addrspace(1) global i32 0, align 4 ; CHECK-LABEL: @icmp_mismatch_flat_group_global_cmp_gv( -; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* -; CHECK: %cmp = icmp eq i32 addrspace(4)* %1, addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*) +; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* +; CHECK: %cmp = icmp eq i32* %1, addrspacecast (i32 addrspace(1)* @global0 to i32*) define i1 @icmp_mismatch_flat_group_global_cmp_gv(i32 addrspace(3)* %group.ptr.0) #0 { - %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* - %cmp = icmp eq i32 addrspace(4)* %cast0, addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*) + %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* + %cmp = icmp eq i32* %cast0, addrspacecast (i32 addrspace(1)* @global0 to i32*) ret i1 %cmp } ; CHECK-LABEL: @icmp_mismatch_group_global_cmp_gv_gv( -; CHECK: %cmp = icmp eq i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds0 to i32 addrspace(4)*), addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*) +; CHECK: %cmp = icmp eq i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), addrspacecast (i32 addrspace(1)* @global0 to i32*) define i1 @icmp_mismatch_group_global_cmp_gv_gv(i32 addrspace(3)* %group.ptr.0) #0 { - %cmp = icmp eq i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds0 to i32 addrspace(4)*), addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*) + %cmp = icmp eq i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), addrspacecast (i32 addrspace(1)* @global0 to i32*) ret i1 %cmp } ; CHECK-LABEL: @icmp_group_flat_cmp_undef( ; CHECK: %cmp = icmp eq i32 addrspace(3)* %group.ptr.0, undef define i1 @icmp_group_flat_cmp_undef(i32 addrspace(3)* %group.ptr.0) #0 { - %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* - %cmp = icmp eq i32 addrspace(4)* %cast0, undef + %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* + %cmp = icmp eq i32* %cast0, undef ret i1 %cmp } ; Test non-canonical orders ; CHECK-LABEL: @icmp_mismatch_flat_group_private_cmp_null_swap( -; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* -; CHECK: %cmp = icmp eq i32 addrspace(4)* addrspacecast (i32* null to i32 addrspace(4)*), %1 +; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* +; CHECK: %cmp = icmp eq i32* addrspacecast (i32 addrspace(5)* null to i32*), %1 define i1 @icmp_mismatch_flat_group_private_cmp_null_swap(i32 addrspace(3)* %group.ptr.0) #0 { - %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* - %cmp = icmp eq i32 addrspace(4)* addrspacecast (i32* null to i32 addrspace(4)*), %cast0 + %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* + %cmp = icmp eq i32* addrspacecast (i32 addrspace(5)* null to i32*), %cast0 ret i1 %cmp } ; CHECK-LABEL: @icmp_group_flat_cmp_undef_swap( ; CHECK: %cmp = icmp eq i32 addrspace(3)* undef, %group.ptr.0 define i1 @icmp_group_flat_cmp_undef_swap(i32 addrspace(3)* %group.ptr.0) #0 { - %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* - %cmp = icmp eq i32 addrspace(4)* undef, %cast0 + %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* + %cmp = icmp eq i32* undef, %cast0 ret i1 %cmp } ; CHECK-LABEL: @icmp_mismatch_flat_group_private_cmp_undef_swap( ; CHECK: %cmp = icmp eq i32 addrspace(3)* undef, %group.ptr.0 define i1 @icmp_mismatch_flat_group_private_cmp_undef_swap(i32 addrspace(3)* %group.ptr.0) #0 { - %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* - %cmp = icmp eq i32 addrspace(4)* addrspacecast (i32* undef to i32 addrspace(4)*), %cast0 + %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* + %cmp = icmp eq i32* addrspacecast (i32 addrspace(5)* undef to i32*), %cast0 ret i1 %cmp } ; TODO: Should be handled ; CHECK-LABEL: @icmp_flat_flat_from_group_vector( -; CHECK: %cmp = icmp eq <2 x i32 addrspace(4)*> %cast0, %cast1 +; CHECK: %cmp = icmp eq <2 x i32*> %cast0, %cast1 define <2 x i1> @icmp_flat_flat_from_group_vector(<2 x i32 addrspace(3)*> %group.ptr.0, <2 x i32 addrspace(3)*> %group.ptr.1) #0 { - %cast0 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.0 to <2 x i32 addrspace(4)*> - %cast1 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.1 to <2 x i32 addrspace(4)*> - %cmp = icmp eq <2 x i32 addrspace(4)*> %cast0, %cast1 + %cast0 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.0 to <2 x i32*> + %cast1 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.1 to <2 x i32*> + %cmp = icmp eq <2 x i32*> %cast0, %cast1 ret <2 x i1> %cmp } diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll index 52067cd37bb..3096d8144dc 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll @@ -30,29 +30,29 @@ ; CHECK: ret void define amdgpu_kernel void @load_store_lds_f32(i32 %i, float %v) #0 { bb: - %tmp = load float, float addrspace(4)* addrspacecast (float addrspace(3)* @scalar to float addrspace(4)*), align 4 + %tmp = load float, float* addrspacecast (float addrspace(3)* @scalar to float*), align 4 call void @use(float %tmp) - store float %v, float addrspace(4)* addrspacecast (float addrspace(3)* @scalar to float addrspace(4)*), align 4 + store float %v, float* addrspacecast (float addrspace(3)* @scalar to float*), align 4 call void @llvm.amdgcn.s.barrier() - %tmp1 = addrspacecast float addrspace(3)* @scalar to float addrspace(4)* - %tmp2 = load float, float addrspace(4)* %tmp1, align 4 + %tmp1 = addrspacecast float addrspace(3)* @scalar to float* + %tmp2 = load float, float* %tmp1, align 4 call void @use(float %tmp2) - store float %v, float addrspace(4)* %tmp1, align 4 + store float %v, float* %tmp1, align 4 call void @llvm.amdgcn.s.barrier() - %tmp3 = load float, float addrspace(4)* getelementptr inbounds ([10 x float], [10 x float] addrspace(4)* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float] addrspace(4)*), i32 0, i32 5), align 4 + %tmp3 = load float, float* getelementptr inbounds ([10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5), align 4 call void @use(float %tmp3) - store float %v, float addrspace(4)* getelementptr inbounds ([10 x float], [10 x float] addrspace(4)* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float] addrspace(4)*), i32 0, i32 5), align 4 + store float %v, float* getelementptr inbounds ([10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5), align 4 call void @llvm.amdgcn.s.barrier() - %tmp4 = getelementptr inbounds [10 x float], [10 x float] addrspace(4)* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float] addrspace(4)*), i32 0, i32 5 - %tmp5 = load float, float addrspace(4)* %tmp4, align 4 + %tmp4 = getelementptr inbounds [10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5 + %tmp5 = load float, float* %tmp4, align 4 call void @use(float %tmp5) - store float %v, float addrspace(4)* %tmp4, align 4 + store float %v, float* %tmp4, align 4 call void @llvm.amdgcn.s.barrier() - %tmp6 = addrspacecast [10 x float] addrspace(3)* @array to [10 x float] addrspace(4)* - %tmp7 = getelementptr inbounds [10 x float], [10 x float] addrspace(4)* %tmp6, i32 0, i32 %i - %tmp8 = load float, float addrspace(4)* %tmp7, align 4 + %tmp6 = addrspacecast [10 x float] addrspace(3)* @array to [10 x float]* + %tmp7 = getelementptr inbounds [10 x float], [10 x float]* %tmp6, i32 0, i32 %i + %tmp8 = load float, float* %tmp7, align 4 call void @use(float %tmp8) - store float %v, float addrspace(4)* %tmp7, align 4 + store float %v, float* %tmp7, align 4 call void @llvm.amdgcn.s.barrier() ret void } @@ -61,7 +61,7 @@ bb: ; CHECK: %tmp = load i32, i32 addrspace(3)* bitcast (float addrspace(3)* @scalar to i32 addrspace(3)*), align 4 define i32 @constexpr_load_int_from_float_lds() #0 { bb: - %tmp = load i32, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @scalar to i32 addrspace(3)*) to i32 addrspace(4)*), align 4 + %tmp = load i32, i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @scalar to i32 addrspace(3)*) to i32*), align 4 ret i32 %tmp } @@ -73,18 +73,18 @@ bb: ; CHECK: ret i32 %tmp4 define i32 @load_int_from_global_float(float addrspace(1)* %input, i32 %i, i32 %j) #0 { bb: - %tmp = addrspacecast float addrspace(1)* %input to float addrspace(4)* - %tmp1 = getelementptr float, float addrspace(4)* %tmp, i32 %i - %tmp2 = getelementptr float, float addrspace(4)* %tmp1, i32 %j - %tmp3 = bitcast float addrspace(4)* %tmp2 to i32 addrspace(4)* - %tmp4 = load i32, i32 addrspace(4)* %tmp3 + %tmp = addrspacecast float addrspace(1)* %input to float* + %tmp1 = getelementptr float, float* %tmp, i32 %i + %tmp2 = getelementptr float, float* %tmp1, i32 %j + %tmp3 = bitcast float* %tmp2 to i32* + %tmp4 = load i32, i32* %tmp3 ret i32 %tmp4 } ; CHECK-LABEL: @nested_const_expr( ; CHECK: store i32 1, i32 addrspace(3)* bitcast (float addrspace(3)* getelementptr inbounds ([10 x float], [10 x float] addrspace(3)* @array, i64 0, i64 1) to i32 addrspace(3)*), align 4 define amdgpu_kernel void @nested_const_expr() #0 { - store i32 1, i32 addrspace(4)* bitcast (float addrspace(4)* getelementptr ([10 x float], [10 x float] addrspace(4)* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float] addrspace(4)*), i64 0, i64 1) to i32 addrspace(4)*), align 4 + store i32 1, i32* bitcast (float* getelementptr ([10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i64 0, i64 1) to i32*), align 4 ret void } @@ -95,10 +95,10 @@ define amdgpu_kernel void @nested_const_expr() #0 { ; CHECK-NEXT: ret void define amdgpu_kernel void @rauw(float addrspace(1)* %input) #0 { bb: - %generic_input = addrspacecast float addrspace(1)* %input to float addrspace(4)* - %addr = getelementptr float, float addrspace(4)* %generic_input, i64 10 - %v = load float, float addrspace(4)* %addr - store float %v, float addrspace(4)* %addr + %generic_input = addrspacecast float addrspace(1)* %input to float* + %addr = getelementptr float, float* %generic_input, i64 10 + %v = load float, float* %addr + store float %v, float* %addr ret void } @@ -119,27 +119,27 @@ bb: ; CHECK: br i1 %exit_cond, label %exit, label %loop define amdgpu_kernel void @loop() #0 { entry: - %p = addrspacecast [10 x float] addrspace(3)* @array to float addrspace(4)* - %end = getelementptr float, float addrspace(4)* %p, i64 10 + %p = addrspacecast [10 x float] addrspace(3)* @array to float* + %end = getelementptr float, float* %p, i64 10 br label %loop loop: ; preds = %loop, %entry - %i = phi float addrspace(4)* [ %p, %entry ], [ %i2, %loop ] - %v = load float, float addrspace(4)* %i + %i = phi float* [ %p, %entry ], [ %i2, %loop ] + %v = load float, float* %i call void @use(float %v) - %i2 = getelementptr float, float addrspace(4)* %i, i64 1 - %exit_cond = icmp eq float addrspace(4)* %i2, %end + %i2 = getelementptr float, float* %i, i64 1 + %exit_cond = icmp eq float* %i2, %end br i1 %exit_cond, label %exit, label %loop exit: ; preds = %loop ret void } -@generic_end = external addrspace(1) global float addrspace(4)* +@generic_end = external addrspace(1) global float* ; CHECK-LABEL: @loop_with_generic_bound( ; CHECK: %p = bitcast [10 x float] addrspace(3)* @array to float addrspace(3)* -; CHECK: %end = load float addrspace(4)*, float addrspace(4)* addrspace(1)* @generic_end +; CHECK: %end = load float*, float* addrspace(1)* @generic_end ; CHECK: br label %loop ; CHECK: loop: @@ -147,21 +147,21 @@ exit: ; preds = %loop ; CHECK: %v = load float, float addrspace(3)* %i ; CHECK: call void @use(float %v) ; CHECK: %i2 = getelementptr float, float addrspace(3)* %i, i64 1 -; CHECK: %0 = addrspacecast float addrspace(3)* %i2 to float addrspace(4)* -; CHECK: %exit_cond = icmp eq float addrspace(4)* %0, %end +; CHECK: %0 = addrspacecast float addrspace(3)* %i2 to float* +; CHECK: %exit_cond = icmp eq float* %0, %end ; CHECK: br i1 %exit_cond, label %exit, label %loop define amdgpu_kernel void @loop_with_generic_bound() #0 { entry: - %p = addrspacecast [10 x float] addrspace(3)* @array to float addrspace(4)* - %end = load float addrspace(4)*, float addrspace(4)* addrspace(1)* @generic_end + %p = addrspacecast [10 x float] addrspace(3)* @array to float* + %end = load float*, float* addrspace(1)* @generic_end br label %loop loop: ; preds = %loop, %entry - %i = phi float addrspace(4)* [ %p, %entry ], [ %i2, %loop ] - %v = load float, float addrspace(4)* %i + %i = phi float* [ %p, %entry ], [ %i2, %loop ] + %v = load float, float* %i call void @use(float %v) - %i2 = getelementptr float, float addrspace(4)* %i, i64 1 - %exit_cond = icmp eq float addrspace(4)* %i2, %end + %i2 = getelementptr float, float* %i, i64 1 + %exit_cond = icmp eq float* %i2, %end br i1 %exit_cond, label %exit, label %loop exit: ; preds = %loop diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-addrspacecast.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-addrspacecast.ll index 74a2595252d..2d4bf148d84 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-addrspacecast.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-addrspacecast.ll @@ -8,9 +8,9 @@ ; CHECK-NEXT: store i32 8, i32 addrspace(3)* %gep0, align 8 ; CHECK-NEXT: ret void define void @addrspacecast_gep_addrspacecast(i32 addrspace(3)* %ptr) { - %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)* - %gep0 = getelementptr i32, i32 addrspace(4)* %asc0, i64 9 - %asc1 = addrspacecast i32 addrspace(4)* %gep0 to i32 addrspace(3)* + %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32* + %gep0 = getelementptr i32, i32* %asc0, i64 9 + %asc1 = addrspacecast i32* %gep0 to i32 addrspace(3)* store i32 8, i32 addrspace(3)* %asc1, align 8 ret void } @@ -21,9 +21,9 @@ define void @addrspacecast_gep_addrspacecast(i32 addrspace(3)* %ptr) { ; CHECK-NEXT: store i8 8, i8 addrspace(3)* [[CAST]], align 8 ; CHECK-NEXT: ret void define void @addrspacecast_different_pointee_type(i32 addrspace(3)* %ptr) { - %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)* - %gep0 = getelementptr i32, i32 addrspace(4)* %asc0, i64 9 - %asc1 = addrspacecast i32 addrspace(4)* %gep0 to i8 addrspace(3)* + %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32* + %gep0 = getelementptr i32, i32* %asc0, i64 9 + %asc1 = addrspacecast i32* %gep0 to i8 addrspace(3)* store i8 8, i8 addrspace(3)* %asc1, align 8 ret void } @@ -33,24 +33,24 @@ define void @addrspacecast_different_pointee_type(i32 addrspace(3)* %ptr) { ; CHECK-NEXT: store volatile i32 addrspace(3)* %gep0, i32 addrspace(3)* addrspace(1)* undef ; CHECK-NEXT: ret void define void @addrspacecast_to_memory(i32 addrspace(3)* %ptr) { - %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)* - %gep0 = getelementptr i32, i32 addrspace(4)* %asc0, i64 9 - %asc1 = addrspacecast i32 addrspace(4)* %gep0 to i32 addrspace(3)* + %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32* + %gep0 = getelementptr i32, i32* %asc0, i64 9 + %asc1 = addrspacecast i32* %gep0 to i32 addrspace(3)* store volatile i32 addrspace(3)* %asc1, i32 addrspace(3)* addrspace(1)* undef ret void } ; CHECK-LABEL: @multiuse_addrspacecast_gep_addrspacecast( -; CHECK: %1 = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)* -; CHECK-NEXT: store volatile i32 addrspace(4)* %1, i32 addrspace(4)* addrspace(1)* undef +; CHECK: %1 = addrspacecast i32 addrspace(3)* %ptr to i32* +; CHECK-NEXT: store volatile i32* %1, i32* addrspace(1)* undef ; CHECK-NEXT: %gep0 = getelementptr i32, i32 addrspace(3)* %ptr, i64 9 ; CHECK-NEXT: store i32 8, i32 addrspace(3)* %gep0, align 8 ; CHECK-NEXT: ret void define void @multiuse_addrspacecast_gep_addrspacecast(i32 addrspace(3)* %ptr) { - %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)* - store volatile i32 addrspace(4)* %asc0, i32 addrspace(4)* addrspace(1)* undef - %gep0 = getelementptr i32, i32 addrspace(4)* %asc0, i64 9 - %asc1 = addrspacecast i32 addrspace(4)* %gep0 to i32 addrspace(3)* + %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32* + store volatile i32* %asc0, i32* addrspace(1)* undef + %gep0 = getelementptr i32, i32* %asc0, i64 9 + %asc1 = addrspacecast i32* %gep0 to i32 addrspace(3)* store i32 8, i32 addrspace(3)* %asc1, align 8 ret void } diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll index e2c255dcb3e..f9b788f07fd 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll @@ -9,8 +9,8 @@ ; CHECK: %gep0 = getelementptr inbounds double, double addrspace(3)* getelementptr inbounds ([648 x double], [648 x double] addrspace(3)* @lds, i64 0, i64 384), i64 %idx0 ; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %gep0, align 8 define void @simplified_constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) { - %gep0 = getelementptr inbounds double, double addrspace(4)* addrspacecast (double addrspace(3)* getelementptr inbounds ([648 x double], [648 x double] addrspace(3)* @lds, i64 0, i64 384) to double addrspace(4)*), i64 %idx0 - %asc = addrspacecast double addrspace(4)* %gep0 to double addrspace(3)* + %gep0 = getelementptr inbounds double, double* addrspacecast (double addrspace(3)* getelementptr inbounds ([648 x double], [648 x double] addrspace(3)* @lds, i64 0, i64 384) to double*), i64 %idx0 + %asc = addrspacecast double* %gep0 to double addrspace(3)* store double 1.000000e+00, double addrspace(3)* %asc, align 8 ret void } @@ -19,8 +19,8 @@ define void @simplified_constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) { ; CHECK-NEXT: %gep0 = getelementptr inbounds double, double addrspace(3)* getelementptr inbounds ([648 x double], [648 x double] addrspace(3)* @lds, i64 0, i64 384), i64 %idx0 ; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %gep0, align 8 define void @constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) { - %gep0 = getelementptr inbounds double, double addrspace(4)* getelementptr ([648 x double], [648 x double] addrspace(4)* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double] addrspace(4)*), i64 0, i64 384), i64 %idx0 - %asc = addrspacecast double addrspace(4)* %gep0 to double addrspace(3)* + %gep0 = getelementptr inbounds double, double* getelementptr ([648 x double], [648 x double]* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double]*), i64 0, i64 384), i64 %idx0 + %asc = addrspacecast double* %gep0 to double addrspace(3)* store double 1.0, double addrspace(3)* %asc, align 8 ret void } @@ -30,27 +30,27 @@ define void @constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) { ; CHECK-NEXT: %gep1 = getelementptr inbounds double, double addrspace(3)* %gep0, i64 %idx1 ; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %gep1, align 8 define void @constexpr_gep_gep_addrspacecast(i64 %idx0, i64 %idx1) { - %gep0 = getelementptr inbounds double, double addrspace(4)* getelementptr ([648 x double], [648 x double] addrspace(4)* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double] addrspace(4)*), i64 0, i64 384), i64 %idx0 - %gep1 = getelementptr inbounds double, double addrspace(4)* %gep0, i64 %idx1 - %asc = addrspacecast double addrspace(4)* %gep1 to double addrspace(3)* + %gep0 = getelementptr inbounds double, double* getelementptr ([648 x double], [648 x double]* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double]*), i64 0, i64 384), i64 %idx0 + %gep1 = getelementptr inbounds double, double* %gep0, i64 %idx1 + %asc = addrspacecast double* %gep1 to double addrspace(3)* store double 1.0, double addrspace(3)* %asc, align 8 ret void } ; Don't crash ; CHECK-LABEL: @vector_gep( -; CHECK: %cast = addrspacecast <4 x [1024 x i32] addrspace(3)*> %array to <4 x [1024 x i32] addrspace(4)*> +; CHECK: %cast = addrspacecast <4 x [1024 x i32] addrspace(3)*> %array to <4 x [1024 x i32]*> define amdgpu_kernel void @vector_gep(<4 x [1024 x i32] addrspace(3)*> %array) nounwind { - %cast = addrspacecast <4 x [1024 x i32] addrspace(3)*> %array to <4 x [1024 x i32] addrspace(4)*> - %p = getelementptr [1024 x i32], <4 x [1024 x i32] addrspace(4)*> %cast, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16> - %p0 = extractelement <4 x i32 addrspace(4)*> %p, i32 0 - %p1 = extractelement <4 x i32 addrspace(4)*> %p, i32 1 - %p2 = extractelement <4 x i32 addrspace(4)*> %p, i32 2 - %p3 = extractelement <4 x i32 addrspace(4)*> %p, i32 3 - store i32 99, i32 addrspace(4)* %p0 - store i32 99, i32 addrspace(4)* %p1 - store i32 99, i32 addrspace(4)* %p2 - store i32 99, i32 addrspace(4)* %p3 + %cast = addrspacecast <4 x [1024 x i32] addrspace(3)*> %array to <4 x [1024 x i32]*> + %p = getelementptr [1024 x i32], <4 x [1024 x i32]*> %cast, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16> + %p0 = extractelement <4 x i32*> %p, i32 0 + %p1 = extractelement <4 x i32*> %p, i32 1 + %p2 = extractelement <4 x i32*> %p, i32 2 + %p3 = extractelement <4 x i32*> %p, i32 3 + store i32 99, i32* %p0 + store i32 99, i32* %p1 + store i32 99, i32* %p2 + store i32 99, i32* %p3 ret void } @@ -61,12 +61,12 @@ define amdgpu_kernel void @vector_gep(<4 x [1024 x i32] addrspace(3)*> %array) n ; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %gep1, align 8 ; CHECK-NEXT: ret void define void @repeated_constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) { - %gep0 = getelementptr inbounds double, double addrspace(4)* getelementptr ([648 x double], [648 x double] addrspace(4)* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double] addrspace(4)*), i64 0, i64 384), i64 %idx0 - %asc0 = addrspacecast double addrspace(4)* %gep0 to double addrspace(3)* + %gep0 = getelementptr inbounds double, double* getelementptr ([648 x double], [648 x double]* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double]*), i64 0, i64 384), i64 %idx0 + %asc0 = addrspacecast double* %gep0 to double addrspace(3)* store double 1.0, double addrspace(3)* %asc0, align 8 - %gep1 = getelementptr inbounds double, double addrspace(4)* getelementptr ([648 x double], [648 x double] addrspace(4)* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double] addrspace(4)*), i64 0, i64 384), i64 %idx1 - %asc1 = addrspacecast double addrspace(4)* %gep1 to double addrspace(3)* + %gep1 = getelementptr inbounds double, double* getelementptr ([648 x double], [648 x double]* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double]*), i64 0, i64 384), i64 %idx1 + %asc1 = addrspacecast double* %gep1 to double addrspace(3)* store double 1.0, double addrspace(3)* %asc1, align 8 ret void diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll index ca6138d3fb0..723ce41588a 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll @@ -3,143 +3,143 @@ ; CHECK-LABEL: @objectsize_group_to_flat_i32( ; CHECK: %val = call i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)* %group.ptr, i1 true, i1 false) define i32 @objectsize_group_to_flat_i32(i8 addrspace(3)* %group.ptr) #0 { - %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)* - %val = call i32 @llvm.objectsize.i32.p4i8(i8 addrspace(4)* %cast, i1 true, i1 false) + %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8* + %val = call i32 @llvm.objectsize.i32.p0i8(i8* %cast, i1 true, i1 false) ret i32 %val } ; CHECK-LABEL: @objectsize_global_to_flat_i64( ; CHECK: %val = call i64 @llvm.objectsize.i64.p3i8(i8 addrspace(3)* %global.ptr, i1 true, i1 false) define i64 @objectsize_global_to_flat_i64(i8 addrspace(3)* %global.ptr) #0 { - %cast = addrspacecast i8 addrspace(3)* %global.ptr to i8 addrspace(4)* - %val = call i64 @llvm.objectsize.i64.p4i8(i8 addrspace(4)* %cast, i1 true, i1 false) + %cast = addrspacecast i8 addrspace(3)* %global.ptr to i8* + %val = call i64 @llvm.objectsize.i64.p0i8(i8* %cast, i1 true, i1 false) ret i64 %val } ; CHECK-LABEL: @atomicinc_global_to_flat_i32( ; CHECK: call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %global.ptr, i32 %y, i32 0, i32 0, i1 false) define i32 @atomicinc_global_to_flat_i32(i32 addrspace(1)* %global.ptr, i32 %y) #0 { - %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)* - %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %cast, i32 %y, i32 0, i32 0, i1 false) + %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32* + %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %cast, i32 %y, i32 0, i32 0, i1 false) ret i32 %ret } ; CHECK-LABEL: @atomicinc_group_to_flat_i32( ; CHECK: %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %group.ptr, i32 %y, i32 0, i32 0, i1 false) define i32 @atomicinc_group_to_flat_i32(i32 addrspace(3)* %group.ptr, i32 %y) #0 { - %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)* - %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %cast, i32 %y, i32 0, i32 0, i1 false) + %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32* + %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %cast, i32 %y, i32 0, i32 0, i1 false) ret i32 %ret } ; CHECK-LABEL: @atomicinc_global_to_flat_i64( ; CHECK: call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %global.ptr, i64 %y, i32 0, i32 0, i1 false) define i64 @atomicinc_global_to_flat_i64(i64 addrspace(1)* %global.ptr, i64 %y) #0 { - %cast = addrspacecast i64 addrspace(1)* %global.ptr to i64 addrspace(4)* - %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %cast, i64 %y, i32 0, i32 0, i1 false) + %cast = addrspacecast i64 addrspace(1)* %global.ptr to i64* + %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 false) ret i64 %ret } ; CHECK-LABEL: @atomicinc_group_to_flat_i64( ; CHECK: call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %group.ptr, i64 %y, i32 0, i32 0, i1 false) define i64 @atomicinc_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y) #0 { - %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)* - %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %cast, i64 %y, i32 0, i32 0, i1 false) + %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64* + %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 false) ret i64 %ret } ; CHECK-LABEL: @atomicdec_global_to_flat_i32( ; CHECK: call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %global.ptr, i32 %val, i32 0, i32 0, i1 false) define i32 @atomicdec_global_to_flat_i32(i32 addrspace(1)* %global.ptr, i32 %val) #0 { - %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)* - %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %cast, i32 %val, i32 0, i32 0, i1 false) + %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32* + %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %cast, i32 %val, i32 0, i32 0, i1 false) ret i32 %ret } ; CHECK-LABEL: @atomicdec_group_to_flat_i32( ; CHECK: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %group.ptr, i32 %val, i32 0, i32 0, i1 false) define i32 @atomicdec_group_to_flat_i32(i32 addrspace(3)* %group.ptr, i32 %val) #0 { - %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)* - %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %cast, i32 %val, i32 0, i32 0, i1 false) + %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32* + %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %cast, i32 %val, i32 0, i32 0, i1 false) ret i32 %ret } ; CHECK-LABEL: @atomicdec_global_to_flat_i64( ; CHECK: call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %global.ptr, i64 %y, i32 0, i32 0, i1 false) define i64 @atomicdec_global_to_flat_i64(i64 addrspace(1)* %global.ptr, i64 %y) #0 { - %cast = addrspacecast i64 addrspace(1)* %global.ptr to i64 addrspace(4)* - %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %cast, i64 %y, i32 0, i32 0, i1 false) + %cast = addrspacecast i64 addrspace(1)* %global.ptr to i64* + %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 false) ret i64 %ret } ; CHECK-LABEL: @atomicdec_group_to_flat_i64( ; CHECK: call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %group.ptr, i64 %y, i32 0, i32 0, i1 false define i64 @atomicdec_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y) #0 { - %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)* - %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %cast, i64 %y, i32 0, i32 0, i1 false) + %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64* + %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 false) ret i64 %ret } ; CHECK-LABEL: @volatile_atomicinc_group_to_flat_i64( -; CHECK-NEXT: %1 = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)* -; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %1, i64 %y, i32 0, i32 0, i1 true) +; CHECK-NEXT: %1 = addrspacecast i64 addrspace(3)* %group.ptr to i64* +; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %1, i64 %y, i32 0, i32 0, i1 true) define i64 @volatile_atomicinc_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y) #0 { - %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)* - %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %cast, i64 %y, i32 0, i32 0, i1 true) + %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64* + %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 true) ret i64 %ret } ; CHECK-LABEL: @volatile_atomicdec_global_to_flat_i32( -; CHECK-NEXT: %1 = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)* -; CHECK-NEXT: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %1, i32 %val, i32 0, i32 0, i1 true) +; CHECK-NEXT: %1 = addrspacecast i32 addrspace(1)* %global.ptr to i32* +; CHECK-NEXT: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %1, i32 %val, i32 0, i32 0, i1 true) define i32 @volatile_atomicdec_global_to_flat_i32(i32 addrspace(1)* %global.ptr, i32 %val) #0 { - %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)* - %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %cast, i32 %val, i32 0, i32 0, i1 true) + %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32* + %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %cast, i32 %val, i32 0, i32 0, i1 true) ret i32 %ret } ; CHECK-LABEL: @volatile_atomicdec_group_to_flat_i32( -; CHECK-NEXT: %1 = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)* -; CHECK-NEXT: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %1, i32 %val, i32 0, i32 0, i1 true) +; CHECK-NEXT: %1 = addrspacecast i32 addrspace(3)* %group.ptr to i32* +; CHECK-NEXT: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %1, i32 %val, i32 0, i32 0, i1 true) define i32 @volatile_atomicdec_group_to_flat_i32(i32 addrspace(3)* %group.ptr, i32 %val) #0 { - %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)* - %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %cast, i32 %val, i32 0, i32 0, i1 true) + %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32* + %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %cast, i32 %val, i32 0, i32 0, i1 true) ret i32 %ret } ; CHECK-LABEL: @volatile_atomicdec_global_to_flat_i64( -; CHECK-NEXT: %1 = addrspacecast i64 addrspace(1)* %global.ptr to i64 addrspace(4)* -; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %1, i64 %y, i32 0, i32 0, i1 true) +; CHECK-NEXT: %1 = addrspacecast i64 addrspace(1)* %global.ptr to i64* +; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %1, i64 %y, i32 0, i32 0, i1 true) define i64 @volatile_atomicdec_global_to_flat_i64(i64 addrspace(1)* %global.ptr, i64 %y) #0 { - %cast = addrspacecast i64 addrspace(1)* %global.ptr to i64 addrspace(4)* - %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %cast, i64 %y, i32 0, i32 0, i1 true) + %cast = addrspacecast i64 addrspace(1)* %global.ptr to i64* + %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 true) ret i64 %ret } ; CHECK-LABEL: @volatile_atomicdec_group_to_flat_i64( -; CHECK-NEXT: %1 = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)* -; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %1, i64 %y, i32 0, i32 0, i1 true) +; CHECK-NEXT: %1 = addrspacecast i64 addrspace(3)* %group.ptr to i64* +; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %1, i64 %y, i32 0, i32 0, i1 true) define i64 @volatile_atomicdec_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y) #0 { - %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)* - %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %cast, i64 %y, i32 0, i32 0, i1 true) + %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64* + %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 true) ret i64 %ret } ; CHECK-LABEL: @invalid_variable_volatile_atomicinc_group_to_flat_i64( -; CHECK-NEXT: %1 = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)* -; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %1, i64 %y, i32 0, i32 0, i1 %volatile.var) +; CHECK-NEXT: %1 = addrspacecast i64 addrspace(3)* %group.ptr to i64* +; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %1, i64 %y, i32 0, i32 0, i1 %volatile.var) define i64 @invalid_variable_volatile_atomicinc_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y, i1 %volatile.var) #0 { - %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)* - %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %cast, i64 %y, i32 0, i32 0, i1 %volatile.var) + %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64* + %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 %volatile.var) ret i64 %ret } -declare i32 @llvm.objectsize.i32.p4i8(i8 addrspace(4)*, i1, i1) #1 -declare i64 @llvm.objectsize.i64.p4i8(i8 addrspace(4)*, i1, i1) #1 -declare i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* nocapture, i32, i32, i32, i1) #2 -declare i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* nocapture, i64, i32, i32, i1) #2 -declare i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* nocapture, i32, i32, i32, i1) #2 -declare i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* nocapture, i64, i32, i32, i1) #2 +declare i32 @llvm.objectsize.i32.p0i8(i8*, i1, i1) #1 +declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1) #1 +declare i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* nocapture, i32, i32, i32, i1) #2 +declare i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* nocapture, i64, i32, i32, i1) #2 +declare i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* nocapture, i32, i32, i32, i1) #2 +declare i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* nocapture, i64, i32, i32, i1) #2 attributes #0 = { nounwind } attributes #1 = { nounwind readnone } diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll index dd0bbfdc6d2..d8987f8b630 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll @@ -3,100 +3,100 @@ ; CHECK-LABEL: @memset_group_to_flat( ; CHECK: call void @llvm.memset.p3i8.i64(i8 addrspace(3)* align 4 %group.ptr, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 define amdgpu_kernel void @memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) #0 { - %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)* - call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 %cast, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 + %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8* + call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 ret void } ; CHECK-LABEL: @memset_global_to_flat( ; CHECK: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* align 4 %global.ptr, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 define amdgpu_kernel void @memset_global_to_flat(i8 addrspace(1)* %global.ptr, i32 %y) #0 { - %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)* - call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 %cast, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 + %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8* + call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 ret void } ; CHECK-LABEL: @memset_group_to_flat_no_md( ; CHECK: call void @llvm.memset.p3i8.i64(i8 addrspace(3)* align 4 %group.ptr, i8 4, i64 %size, i1 false){{$}} define amdgpu_kernel void @memset_group_to_flat_no_md(i8 addrspace(3)* %group.ptr, i64 %size) #0 { - %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)* - call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 %cast, i8 4, i64 %size, i1 false) + %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8* + call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 %size, i1 false) ret void } ; CHECK-LABEL: @memset_global_to_flat_no_md( ; CHECK: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* align 4 %global.ptr, i8 4, i64 %size, i1 false){{$}} define amdgpu_kernel void @memset_global_to_flat_no_md(i8 addrspace(1)* %global.ptr, i64 %size) #0 { - %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)* - call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 %cast, i8 4, i64 %size, i1 false) + %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8* + call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 %size, i1 false) ret void } ; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group( -; CHCK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* align 4 %dest, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 -define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { - %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)* - call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %dest, i8 addrspace(4)* align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 +; CHCK: call void @llvm.memcpy.p0i8.p3i8.i64(i8* align 4 %dest, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 +define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group(i8* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { + %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8* + call void @llvm.memcpy.p4i8.p0i8.i64(i8* align 4 %dest, i8* align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 ret void } ; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_with_group( -; CHECK: call void @llvm.memcpy.p3i8.p4i8.i64(i8 addrspace(3)* align 4 %dest.group.ptr, i8 addrspace(4)* align 4 %src.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 -define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_with_group(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(4)* %src.ptr, i64 %size) #0 { - %cast.dest = addrspacecast i8 addrspace(3)* %dest.group.ptr to i8 addrspace(4)* - call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %cast.dest, i8 addrspace(4)* align 4 %src.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 +; CHECK: call void @llvm.memcpy.p3i8.p0i8.i64(i8 addrspace(3)* align 4 %dest.group.ptr, i8* align 4 %src.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 +define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_with_group(i8 addrspace(3)* %dest.group.ptr, i8* %src.ptr, i64 %size) #0 { + %cast.dest = addrspacecast i8 addrspace(3)* %dest.group.ptr to i8* + call void @llvm.memcpy.p4i8.p0i8.i64(i8* align 4 %cast.dest, i8* align 4 %src.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 ret void } ; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_src_with_group( ; CHECK: call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* align 4 %src.group.ptr, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_src_with_group(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { - %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)* - %cast.dest = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)* - call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %cast.dest, i8 addrspace(4)* align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 + %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8* + %cast.dest = addrspacecast i8 addrspace(3)* %src.group.ptr to i8* + call void @llvm.memcpy.p4i8.p0i8.i64(i8* align 4 %cast.dest, i8* align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 ret void } ; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_group_src_global( ; CHECK: call void @llvm.memcpy.p3i8.p1i8.i64(i8 addrspace(3)* align 4 %dest.group.ptr, i8 addrspace(1)* align 4 %src.global.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_group_src_global(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(1)* %src.global.ptr, i64 %size) #0 { - %cast.src = addrspacecast i8 addrspace(1)* %src.global.ptr to i8 addrspace(4)* - %cast.dest = addrspacecast i8 addrspace(3)* %dest.group.ptr to i8 addrspace(4)* - call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %cast.dest, i8 addrspace(4)* align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 + %cast.src = addrspacecast i8 addrspace(1)* %src.global.ptr to i8* + %cast.dest = addrspacecast i8 addrspace(3)* %dest.group.ptr to i8* + call void @llvm.memcpy.p4i8.p0i8.i64(i8* align 4 %cast.dest, i8* align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 ret void } ; CHECK-LABEL: @memcpy_group_to_flat_replace_dest_global( ; CHECK: call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %dest.global.ptr, i8 addrspace(3)* align 4 %src.group.ptr, i32 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 define amdgpu_kernel void @memcpy_group_to_flat_replace_dest_global(i8 addrspace(1)* %dest.global.ptr, i8 addrspace(3)* %src.group.ptr, i32 %size) #0 { - %cast.dest = addrspacecast i8 addrspace(1)* %dest.global.ptr to i8 addrspace(4)* - call void @llvm.memcpy.p4i8.p3i8.i32(i8 addrspace(4)* align 4 %cast.dest, i8 addrspace(3)* align 4 %src.group.ptr, i32 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 + %cast.dest = addrspacecast i8 addrspace(1)* %dest.global.ptr to i8* + call void @llvm.memcpy.p0i8.p3i8.i32(i8* align 4 %cast.dest, i8 addrspace(3)* align 4 %src.group.ptr, i32 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 ret void } ; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct( -; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* align 4 %dest, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false), !tbaa.struct !7 -define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { - %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)* - call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %dest, i8 addrspace(4)* align 4 %cast.src, i64 %size, i1 false), !tbaa.struct !7 +; CHECK: call void @llvm.memcpy.p0i8.p3i8.i64(i8* align 4 %dest, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false), !tbaa.struct !7 +define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(i8* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { + %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8* + call void @llvm.memcpy.p4i8.p0i8.i64(i8* align 4 %dest, i8* align 4 %cast.src, i64 %size, i1 false), !tbaa.struct !7 ret void } ; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group_no_md( -; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* align 4 %dest, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false){{$}} -define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_no_md(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { - %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)* - call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %dest, i8 addrspace(4)* align 4 %cast.src, i64 %size, i1 false) +; CHECK: call void @llvm.memcpy.p0i8.p3i8.i64(i8* align 4 %dest, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false){{$}} +define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_no_md(i8* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { + %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8* + call void @llvm.memcpy.p4i8.p0i8.i64(i8* align 4 %dest, i8* align 4 %cast.src, i64 %size, i1 false) ret void } ; CHECK-LABEL: @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md( -; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* align 4 %dest0, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false){{$}} -; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* align 4 %dest1, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false){{$}} -define amdgpu_kernel void @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(i8 addrspace(4)* %dest0, i8 addrspace(4)* %dest1, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { - %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)* - call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %dest0, i8 addrspace(4)* align 4 %cast.src, i64 %size, i1 false) - call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %dest1, i8 addrspace(4)* align 4 %cast.src, i64 %size, i1 false) +; CHECK: call void @llvm.memcpy.p0i8.p3i8.i64(i8* align 4 %dest0, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false){{$}} +; CHECK: call void @llvm.memcpy.p0i8.p3i8.i64(i8* align 4 %dest1, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false){{$}} +define amdgpu_kernel void @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(i8* %dest0, i8* %dest1, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { + %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8* + call void @llvm.memcpy.p4i8.p0i8.i64(i8* align 4 %dest0, i8* align 4 %cast.src, i64 %size, i1 false) + call void @llvm.memcpy.p4i8.p0i8.i64(i8* align 4 %dest1, i8* align 4 %cast.src, i64 %size, i1 false) ret void } @@ -104,22 +104,22 @@ define amdgpu_kernel void @multiple_memcpy_flat_to_flat_replace_src_with_group_n ; CHECK-LABEL: @memcpy_group_flat_to_flat_self( ; CHECK: call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* align 4 %group.ptr, i8 addrspace(3)* align 4 %group.ptr, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 define amdgpu_kernel void @memcpy_group_flat_to_flat_self(i8 addrspace(3)* %group.ptr) #0 { - %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)* - call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %cast, i8 addrspace(4)* align 4 %cast, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 + %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8* + call void @llvm.memcpy.p4i8.p0i8.i64(i8* align 4 %cast, i8* align 4 %cast, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 ret void } ; CHECK-LABEL: @memmove_flat_to_flat_replace_src_with_group( -; CHECK: call void @llvm.memmove.p4i8.p3i8.i64(i8 addrspace(4)* align 4 %dest, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 -define amdgpu_kernel void @memmove_flat_to_flat_replace_src_with_group(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { - %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)* - call void @llvm.memmove.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %dest, i8 addrspace(4)* align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 +; CHECK: call void @llvm.memmove.p0i8.p3i8.i64(i8* align 4 %dest, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 +define amdgpu_kernel void @memmove_flat_to_flat_replace_src_with_group(i8* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 { + %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8* + call void @llvm.memmove.p4i8.p0i8.i64(i8* align 4 %dest, i8* align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4 ret void } -declare void @llvm.memset.p4i8.i64(i8 addrspace(4)* nocapture writeonly, i8, i64, i1) #1 -declare void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* nocapture writeonly, i8 addrspace(4)* nocapture readonly, i64, i1) #1 -declare void @llvm.memcpy.p4i8.p3i8.i32(i8 addrspace(4)* nocapture writeonly, i8 addrspace(3)* nocapture readonly, i32, i1) #1 -declare void @llvm.memmove.p4i8.p4i8.i64(i8 addrspace(4)* nocapture writeonly, i8 addrspace(4)* nocapture readonly, i64, i1) #1 +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #1 +declare void @llvm.memcpy.p4i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1 +declare void @llvm.memcpy.p0i8.p3i8.i32(i8* nocapture writeonly, i8 addrspace(3)* nocapture readonly, i32, i1) #1 +declare void @llvm.memmove.p4i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1 attributes #0 = { nounwind } attributes #1 = { argmemonly nounwind } diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/old-pass-regressions.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/old-pass-regressions.ll index 3231b6ccf1c..2080c51b66f 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/old-pass-regressions.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/old-pass-regressions.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s | FileCheck %s +; RUN: opt -data-layout=A5 -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s | FileCheck %s ; Regression tests from old HSAIL addrspacecast optimization pass @@ -14,7 +14,7 @@ entry: %tmp1 = call i32 @llvm.amdgcn.workitem.id.x() %tmp2 = zext i32 %tmp1 to i64 %tmp3 = add i64 %tmp2, %arg0 - %vecload1 = load <2 x double>, <2 x double> addrspace(4)* bitcast (double addrspace(4)* getelementptr ([100 x double], [100 x double] addrspace(4)* addrspacecast ([100 x double] addrspace(1)* @data to [100 x double] addrspace(4)*), i64 0, i64 4) to <2 x double> addrspace(4)*), align 8 + %vecload1 = load <2 x double>, <2 x double>* bitcast (double* getelementptr ([100 x double], [100 x double]* addrspacecast ([100 x double] addrspace(1)* @data to [100 x double]*), i64 0, i64 4) to <2 x double>*), align 8 %cmp = fcmp ord <2 x double> %vecload1, zeroinitializer %sext = sext <2 x i1> %cmp to <2 x i64> %tmp4 = extractelement <2 x i64> %sext, i64 0 @@ -30,7 +30,7 @@ entry: @generic_address_bug9749.val = internal addrspace(1) global float 0.0, align 4 -declare i32 @_Z9get_fencePU3AS4v(i8 addrspace(4)*) +declare i32 @_Z9get_fencePv(i8*) %opencl.pipe_t = type opaque ; This is a compile time assert bug, but we still want to check optimization @@ -53,24 +53,24 @@ entry: ; Should generate flat load ; CHECK-LABEL: @generic_address_bug9749( ; CHECK: br i1 -; CHECK: load float, float addrspace(4)* +; CHECK: load float, float* ; CHECK: br label define amdgpu_kernel void @generic_address_bug9749(i32 addrspace(1)* nocapture %results) #0 { entry: - %ptr = alloca float addrspace(4)*, align 8 + %ptr = alloca float*, align 8, addrspace(5) %tmp = call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = zext i32 %tmp to i64 store float 0x3FB99999A0000000, float addrspace(1)* @generic_address_bug9749.val, align 4 - store volatile float addrspace(4)* addrspacecast (float addrspace(1)* @generic_address_bug9749.val to float addrspace(4)*), float addrspace(4)** %ptr, align 8 - %tmp2 = load volatile float addrspace(4)*, float addrspace(4)** %ptr, align 8 + store volatile float* addrspacecast (float addrspace(1)* @generic_address_bug9749.val to float*), float* addrspace(5)* %ptr, align 8 + %tmp2 = load volatile float*, float* addrspace(5)* %ptr, align 8 %tmp3 = load float, float addrspace(1)* @generic_address_bug9749.val, align 4 - %tmp4 = bitcast float addrspace(4)* %tmp2 to i8 addrspace(4)* - %call.i = call i32 @_Z9get_fencePU3AS4v(i8 addrspace(4)* %tmp4) #1 + %tmp4 = bitcast float* %tmp2 to i8* + %call.i = call i32 @_Z9get_fencePv(i8* %tmp4) #1 %switch.i.i = icmp ult i32 %call.i, 4 br i1 %switch.i.i, label %if.end.i, label %helperFunction.exit if.end.i: ; preds = %entry - %tmp5 = load float, float addrspace(4)* %tmp2, align 4 + %tmp5 = load float, float* %tmp2, align 4 %not.cmp.i = fcmp oeq float %tmp5, %tmp3 %phitmp = zext i1 %not.cmp.i to i32 br label %helperFunction.exit @@ -91,14 +91,14 @@ entry: br i1 %cmp1, label %for.end, label %for.body.lr.ph for.body.lr.ph: ; preds = %entry - %tmp = addrspacecast i32 addrspace(3)* %in to i32 addrspace(4)* + %tmp = addrspacecast i32 addrspace(3)* %in to i32* br label %for.body for.body: ; preds = %for.body, %for.body.lr.ph %i.03 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] - %ptr.02 = phi i32 addrspace(4)* [ %tmp, %for.body.lr.ph ], [ %add.ptr, %for.body ] - store i32 %i.03, i32 addrspace(4)* %ptr.02, align 4 - %add.ptr = getelementptr inbounds i32, i32 addrspace(4)* %ptr.02, i64 4 + %ptr.02 = phi i32* [ %tmp, %for.body.lr.ph ], [ %add.ptr, %for.body ] + store i32 %i.03, i32* %ptr.02, align 4 + %add.ptr = getelementptr inbounds i32, i32* %ptr.02, i64 4 %inc = add nuw i32 %i.03, 1 %exitcond = icmp eq i32 %inc, %numElems br i1 %exitcond, label %for.end, label %for.body @@ -116,23 +116,23 @@ entry: %tmp2 = zext i32 %tmp1 to i64 %tmp3 = add i64 %tmp2, %arg0 %sext = shl i64 %tmp3, 32 - %tmp4 = addrspacecast i32 addrspace(3)* %destValues to i32 addrspace(4)* - %tmp5 = addrspacecast i32 addrspace(3)* %sourceA to i32 addrspace(4)* + %tmp4 = addrspacecast i32 addrspace(3)* %destValues to i32* + %tmp5 = addrspacecast i32 addrspace(3)* %sourceA to i32* %tmp6 = ashr exact i64 %sext, 31 - %tmp7 = getelementptr inbounds i32, i32 addrspace(4)* %tmp5, i64 %tmp6 - %arrayidx_v4 = bitcast i32 addrspace(4)* %tmp7 to <2 x i32> addrspace(4)* - %vecload = load <2 x i32>, <2 x i32> addrspace(4)* %arrayidx_v4, align 4 + %tmp7 = getelementptr inbounds i32, i32* %tmp5, i64 %tmp6 + %arrayidx_v4 = bitcast i32* %tmp7 to <2 x i32>* + %vecload = load <2 x i32>, <2 x i32>* %arrayidx_v4, align 4 %tmp8 = extractelement <2 x i32> %vecload, i32 0 %tmp9 = extractelement <2 x i32> %vecload, i32 1 %tmp10 = icmp eq i32 %tmp8, 0 %tmp11 = select i1 %tmp10, i32 32, i32 %tmp8 %tmp12 = icmp eq i32 %tmp9, 0 %tmp13 = select i1 %tmp12, i32 32, i32 %tmp9 - %tmp14 = getelementptr inbounds i32, i32 addrspace(4)* %tmp4, i64 %tmp6 + %tmp14 = getelementptr inbounds i32, i32* %tmp4, i64 %tmp6 %tmp15 = insertelement <2 x i32> undef, i32 %tmp11, i32 0 %tmp16 = insertelement <2 x i32> %tmp15, i32 %tmp13, i32 1 - %arrayidx_v41 = bitcast i32 addrspace(4)* %tmp14 to <2 x i32> addrspace(4)* - store <2 x i32> %tmp16, <2 x i32> addrspace(4)* %arrayidx_v41, align 4 + %arrayidx_v41 = bitcast i32* %tmp14 to <2 x i32>* + store <2 x i32> %tmp16, <2 x i32>* %arrayidx_v41, align 4 ret void } @@ -140,4 +140,4 @@ declare i32 @llvm.amdgcn.workitem.id.x() #2 attributes #0 = { nounwind } attributes #1 = { nounwind readonly } -attributes #2 = { nounwind readnone }
\ No newline at end of file +attributes #2 = { nounwind readnone } diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll index 08edc20ecf9..598bb68dc29 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll @@ -4,25 +4,25 @@ ; this doesn't do something insane on non-canonical IR. ; CHECK-LABEL: @return_select_group_flat( -; CHECK-NEXT: %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* -; CHECK-NEXT: %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)* -; CHECK-NEXT: %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* %cast1 -; CHECK-NEXT: ret i32 addrspace(4)* %select -define i32 addrspace(4)* @return_select_group_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 { - %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* - %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)* - %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* %cast1 - ret i32 addrspace(4)* %select +; CHECK-NEXT: %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* +; CHECK-NEXT: %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32* +; CHECK-NEXT: %select = select i1 %c, i32* %cast0, i32* %cast1 +; CHECK-NEXT: ret i32* %select +define i32* @return_select_group_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 { + %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* + %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32* + %select = select i1 %c, i32* %cast0, i32* %cast1 + ret i32* %select } ; CHECK-LABEL: @store_select_group_flat( ; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1 ; CHECK: store i32 -1, i32 addrspace(3)* %select define amdgpu_kernel void @store_select_group_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 { - %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* - %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)* - %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* %cast1 - store i32 -1, i32 addrspace(4)* %select + %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* + %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32* + %select = select i1 %c, i32* %cast0, i32* %cast1 + store i32 -1, i32* %select ret void } @@ -31,23 +31,23 @@ define amdgpu_kernel void @store_select_group_flat(i1 %c, i32 addrspace(3)* %gro ; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1, !prof !0 ; CHECK: %load = load i32, i32 addrspace(3)* %select define i32 @load_select_group_flat_md(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 { - %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* - %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)* - %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* %cast1, !prof !0 - %load = load i32, i32 addrspace(4)* %select + %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* + %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32* + %select = select i1 %c, i32* %cast0, i32* %cast1, !prof !0 + %load = load i32, i32* %select ret i32 %load } ; CHECK-LABEL: @store_select_mismatch_group_private_flat( -; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* -; CHECK: %2 = addrspacecast i32* %private.ptr.1 to i32 addrspace(4)* -; CHECK: %select = select i1 %c, i32 addrspace(4)* %1, i32 addrspace(4)* %2 -; CHECK: store i32 -1, i32 addrspace(4)* %select -define amdgpu_kernel void @store_select_mismatch_group_private_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32* %private.ptr.1) #0 { - %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* - %cast1 = addrspacecast i32* %private.ptr.1 to i32 addrspace(4)* - %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* %cast1 - store i32 -1, i32 addrspace(4)* %select +; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* +; CHECK: %2 = addrspacecast i32 addrspace(5)* %private.ptr.1 to i32* +; CHECK: %select = select i1 %c, i32* %1, i32* %2 +; CHECK: store i32 -1, i32* %select +define amdgpu_kernel void @store_select_mismatch_group_private_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(5)* %private.ptr.1) #0 { + %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* + %cast1 = addrspacecast i32 addrspace(5)* %private.ptr.1 to i32* + %select = select i1 %c, i32* %cast0, i32* %cast1 + store i32 -1, i32* %select ret void } @@ -58,35 +58,35 @@ define amdgpu_kernel void @store_select_mismatch_group_private_flat(i1 %c, i32 a ; CHECK: %tmp = load i32, i32 addrspace(3)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(3)* @lds0, i32 addrspace(3)* @lds1) define i32 @constexpr_select_group_flat() #0 { bb: - %tmp = load i32, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds0 to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds1 to i32 addrspace(4)*)) + %tmp = load i32, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), i32* addrspacecast (i32 addrspace(3)* @lds1 to i32*)) ret i32 %tmp } ; CHECK-LABEL: @constexpr_select_group_global_flat_mismatch( -; CHECK: %tmp = load i32, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds0 to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*)) +; CHECK: %tmp = load i32, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), i32* addrspacecast (i32 addrspace(1)* @global0 to i32*)) define i32 @constexpr_select_group_global_flat_mismatch() #0 { bb: - %tmp = load i32, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds0 to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*)) + %tmp = load i32, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), i32* addrspacecast (i32 addrspace(1)* @global0 to i32*)) ret i32 %tmp } ; CHECK-LABEL: @store_select_group_flat_null( -; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*) +; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32* null to i32 addrspace(3)*) ; CHECK: store i32 -1, i32 addrspace(3)* %select define amdgpu_kernel void @store_select_group_flat_null(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { - %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* - %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* null - store i32 -1, i32 addrspace(4)* %select + %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* + %select = select i1 %c, i32* %cast0, i32* null + store i32 -1, i32* %select ret void } ; CHECK-LABEL: @store_select_group_flat_null_swap( -; CHECK: %select = select i1 %c, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*), i32 addrspace(3)* %group.ptr.0 +; CHECK: %select = select i1 %c, i32 addrspace(3)* addrspacecast (i32* null to i32 addrspace(3)*), i32 addrspace(3)* %group.ptr.0 ; CHECK: store i32 -1, i32 addrspace(3)* %select define amdgpu_kernel void @store_select_group_flat_null_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { - %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* - %select = select i1 %c, i32 addrspace(4)* null, i32 addrspace(4)* %cast0 - store i32 -1, i32 addrspace(4)* %select + %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* + %select = select i1 %c, i32* null, i32* %cast0 + store i32 -1, i32* %select ret void } @@ -94,9 +94,9 @@ define amdgpu_kernel void @store_select_group_flat_null_swap(i1 %c, i32 addrspac ; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* undef ; CHECK: store i32 -1, i32 addrspace(3)* %select define amdgpu_kernel void @store_select_group_flat_undef(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { - %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* - %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* undef - store i32 -1, i32 addrspace(4)* %select + %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* + %select = select i1 %c, i32* %cast0, i32* undef + store i32 -1, i32* %select ret void } @@ -104,21 +104,21 @@ define amdgpu_kernel void @store_select_group_flat_undef(i1 %c, i32 addrspace(3) ; CHECK: %select = select i1 %c, i32 addrspace(3)* undef, i32 addrspace(3)* %group.ptr.0 ; CHECK: store i32 -1, i32 addrspace(3)* %select define amdgpu_kernel void @store_select_group_flat_undef_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { - %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* - %select = select i1 %c, i32 addrspace(4)* undef, i32 addrspace(4)* %cast0 - store i32 -1, i32 addrspace(4)* %select + %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* + %select = select i1 %c, i32* undef, i32* %cast0 + store i32 -1, i32* %select ret void } ; CHECK-LABEL: @store_select_gep_group_flat_null( -; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*) +; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32* null to i32 addrspace(3)*) ; CHECK: %gep = getelementptr i32, i32 addrspace(3)* %select, i64 16 ; CHECK: store i32 -1, i32 addrspace(3)* %gep define amdgpu_kernel void @store_select_gep_group_flat_null(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { - %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* - %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* null - %gep = getelementptr i32, i32 addrspace(4)* %select, i64 16 - store i32 -1, i32 addrspace(4)* %gep + %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* + %select = select i1 %c, i32* %cast0, i32* null + %gep = getelementptr i32, i32* %select, i64 16 + store i32 -1, i32* %gep ret void } @@ -128,19 +128,19 @@ define amdgpu_kernel void @store_select_gep_group_flat_null(i1 %c, i32 addrspace ; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* @lds1 ; CHECK: store i32 7, i32 addrspace(3)* %select define amdgpu_kernel void @store_select_group_flat_constexpr(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { - %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* - %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds1 to i32 addrspace(4)*) - store i32 7, i32 addrspace(4)* %select + %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* + %select = select i1 %c, i32* %cast0, i32* addrspacecast (i32 addrspace(3)* @lds1 to i32*) + store i32 7, i32* %select ret void } ; CHECK-LABEL: @store_select_group_flat_inttoptr_flat( -; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* inttoptr (i64 12345 to i32 addrspace(4)*) to i32 addrspace(3)*) +; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32* inttoptr (i64 12345 to i32*) to i32 addrspace(3)*) ; CHECK: store i32 7, i32 addrspace(3)* %select define amdgpu_kernel void @store_select_group_flat_inttoptr_flat(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { - %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* - %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* inttoptr (i64 12345 to i32 addrspace(4)*) - store i32 7, i32 addrspace(4)* %select + %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* + %select = select i1 %c, i32* %cast0, i32* inttoptr (i64 12345 to i32*) + store i32 7, i32* %select ret void } @@ -148,114 +148,114 @@ define amdgpu_kernel void @store_select_group_flat_inttoptr_flat(i1 %c, i32 addr ; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* inttoptr (i32 400 to i32 addrspace(3)*) ; CHECK-NEXT: store i32 7, i32 addrspace(3)* %select define amdgpu_kernel void @store_select_group_flat_inttoptr_group(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { - %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* - %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* inttoptr (i32 400 to i32 addrspace(3)*) to i32 addrspace(4)*) - store i32 7, i32 addrspace(4)* %select + %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* + %select = select i1 %c, i32* %cast0, i32* addrspacecast (i32 addrspace(3)* inttoptr (i32 400 to i32 addrspace(3)*) to i32*) + store i32 7, i32* %select ret void } ; CHECK-LABEL: @store_select_group_global_mismatch_flat_constexpr( -; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* -; CHECK: %select = select i1 %c, i32 addrspace(4)* %1, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*) -; CHECK: store i32 7, i32 addrspace(4)* %select +; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* +; CHECK: %select = select i1 %c, i32* %1, i32* addrspacecast (i32 addrspace(1)* @global0 to i32*) +; CHECK: store i32 7, i32* %select define amdgpu_kernel void @store_select_group_global_mismatch_flat_constexpr(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { - %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* - %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*) - store i32 7, i32 addrspace(4)* %select + %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* + %select = select i1 %c, i32* %cast0, i32* addrspacecast (i32 addrspace(1)* @global0 to i32*) + store i32 7, i32* %select ret void } ; CHECK-LABEL: @store_select_group_global_mismatch_flat_constexpr_swap( -; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* -; CHECK: %select = select i1 %c, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*), i32 addrspace(4)* %1 -; CHECK: store i32 7, i32 addrspace(4)* %select +; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* +; CHECK: %select = select i1 %c, i32* addrspacecast (i32 addrspace(1)* @global0 to i32*), i32* %1 +; CHECK: store i32 7, i32* %select define amdgpu_kernel void @store_select_group_global_mismatch_flat_constexpr_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { - %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* - %select = select i1 %c, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*), i32 addrspace(4)* %cast0 - store i32 7, i32 addrspace(4)* %select + %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* + %select = select i1 %c, i32* addrspacecast (i32 addrspace(1)* @global0 to i32*), i32* %cast0 + store i32 7, i32* %select ret void } ; CHECK-LABEL: @store_select_group_global_mismatch_null_null( -; CHECK: %select = select i1 %c, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*) -; CHECK: store i32 7, i32 addrspace(4)* %select +; CHECK: %select = select i1 %c, i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*) +; CHECK: store i32 7, i32* %select define amdgpu_kernel void @store_select_group_global_mismatch_null_null(i1 %c) #0 { - %select = select i1 %c, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*) - store i32 7, i32 addrspace(4)* %select + %select = select i1 %c, i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*) + store i32 7, i32* %select ret void } ; CHECK-LABEL: @store_select_group_global_mismatch_null_null_constexpr( -; CHECK: store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4 +; CHECK: store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4 define amdgpu_kernel void @store_select_group_global_mismatch_null_null_constexpr() #0 { - store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4 + store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4 ret void } ; CHECK-LABEL: @store_select_group_global_mismatch_gv_null_constexpr( -; CHECK: store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds0 to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4 +; CHECK: store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4 define amdgpu_kernel void @store_select_group_global_mismatch_gv_null_constexpr() #0 { - store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds0 to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4 + store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4 ret void } ; CHECK-LABEL: @store_select_group_global_mismatch_null_gv_constexpr( -; CHECK: store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*)), align 4 +; CHECK: store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* @global0 to i32*)), align 4 define amdgpu_kernel void @store_select_group_global_mismatch_null_gv_constexpr() #0 { - store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*)), align 4 + store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* @global0 to i32*)), align 4 ret void } ; CHECK-LABEL: @store_select_group_global_mismatch_inttoptr_null_constexpr( -; CHECK: store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* inttoptr (i64 123 to i32 addrspace(3)*) to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4 +; CHECK: store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* inttoptr (i64 123 to i32 addrspace(3)*) to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4 define amdgpu_kernel void @store_select_group_global_mismatch_inttoptr_null_constexpr() #0 { - store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* inttoptr (i64 123 to i32 addrspace(3)*) to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4 + store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* inttoptr (i64 123 to i32 addrspace(3)*) to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4 ret void } ; CHECK-LABEL: @store_select_group_global_mismatch_inttoptr_flat_null_constexpr( -; CHECK: store i32 7, i32 addrspace(1)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(1)* addrspacecast (i32 addrspace(4)* inttoptr (i64 123 to i32 addrspace(4)*) to i32 addrspace(1)*), i32 addrspace(1)* null), align 4 +; CHECK: store i32 7, i32 addrspace(1)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(1)* addrspacecast (i32* inttoptr (i64 123 to i32*) to i32 addrspace(1)*), i32 addrspace(1)* null), align 4 define amdgpu_kernel void @store_select_group_global_mismatch_inttoptr_flat_null_constexpr() #0 { - store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* inttoptr (i64 123 to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4 + store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* inttoptr (i64 123 to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4 ret void } ; CHECK-LABEL: @store_select_group_global_mismatch_undef_undef_constexpr( ; CHECK: store i32 7, i32 addrspace(3)* null define amdgpu_kernel void @store_select_group_global_mismatch_undef_undef_constexpr() #0 { - store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* undef to i32 addrspace(4)*)), align 4 + store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* undef to i32*)), align 4 ret void } @lds2 = external addrspace(3) global [1024 x i32], align 4 ; CHECK-LABEL: @store_select_group_constexpr_ptrtoint( -; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* -; CHECK: %select = select i1 %c, i32 addrspace(4)* %1, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* inttoptr (i32 add (i32 ptrtoint ([1024 x i32] addrspace(3)* @lds2 to i32), i32 124) to i32 addrspace(1)*) to i32 addrspace(4)*) -; CHECK: store i32 7, i32 addrspace(4)* %select +; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* +; CHECK: %select = select i1 %c, i32* %1, i32* addrspacecast (i32 addrspace(1)* inttoptr (i32 add (i32 ptrtoint ([1024 x i32] addrspace(3)* @lds2 to i32), i32 124) to i32 addrspace(1)*) to i32*) +; CHECK: store i32 7, i32* %select define amdgpu_kernel void @store_select_group_constexpr_ptrtoint(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 { - %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)* - %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* inttoptr (i32 add (i32 ptrtoint ([1024 x i32] addrspace(3)* @lds2 to i32), i32 124) to i32 addrspace(1)*) to i32 addrspace(4)*) - store i32 7, i32 addrspace(4)* %select + %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32* + %select = select i1 %c, i32* %cast0, i32* addrspacecast (i32 addrspace(1)* inttoptr (i32 add (i32 ptrtoint ([1024 x i32] addrspace(3)* @lds2 to i32), i32 124) to i32 addrspace(1)*) to i32*) + store i32 7, i32* %select ret void } ; CHECK-LABEL: @store_select_group_flat_vector( -; CHECK: %cast0 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.0 to <2 x i32 addrspace(4)*> -; CHECK: %cast1 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.1 to <2 x i32 addrspace(4)*> -; CHECK: %select = select i1 %c, <2 x i32 addrspace(4)*> %cast0, <2 x i32 addrspace(4)*> %cast1 -; CHECK: %extract0 = extractelement <2 x i32 addrspace(4)*> %select, i32 0 -; CHECK: %extract1 = extractelement <2 x i32 addrspace(4)*> %select, i32 1 -; CHECK: store i32 -1, i32 addrspace(4)* %extract0 -; CHECK: store i32 -2, i32 addrspace(4)* %extract1 +; CHECK: %cast0 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.0 to <2 x i32*> +; CHECK: %cast1 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.1 to <2 x i32*> +; CHECK: %select = select i1 %c, <2 x i32*> %cast0, <2 x i32*> %cast1 +; CHECK: %extract0 = extractelement <2 x i32*> %select, i32 0 +; CHECK: %extract1 = extractelement <2 x i32*> %select, i32 1 +; CHECK: store i32 -1, i32* %extract0 +; CHECK: store i32 -2, i32* %extract1 define amdgpu_kernel void @store_select_group_flat_vector(i1 %c, <2 x i32 addrspace(3)*> %group.ptr.0, <2 x i32 addrspace(3)*> %group.ptr.1) #0 { - %cast0 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.0 to <2 x i32 addrspace(4)*> - %cast1 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.1 to <2 x i32 addrspace(4)*> - %select = select i1 %c, <2 x i32 addrspace(4)*> %cast0, <2 x i32 addrspace(4)*> %cast1 - %extract0 = extractelement <2 x i32 addrspace(4)*> %select, i32 0 - %extract1 = extractelement <2 x i32 addrspace(4)*> %select, i32 1 - store i32 -1, i32 addrspace(4)* %extract0 - store i32 -2, i32 addrspace(4)* %extract1 + %cast0 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.0 to <2 x i32*> + %cast1 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.1 to <2 x i32*> + %select = select i1 %c, <2 x i32*> %cast0, <2 x i32*> %cast1 + %extract0 = extractelement <2 x i32*> %select, i32 0 + %extract1 = extractelement <2 x i32*> %select, i32 1 + store i32 -1, i32* %extract0 + store i32 -2, i32* %extract1 ret void } diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll index 49467cea930..6c9449cc8ff 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll @@ -3,138 +3,138 @@ ; Check that volatile users of addrspacecast are not replaced. ; CHECK-LABEL: @volatile_load_flat_from_global( -; CHECK: load volatile i32, i32 addrspace(4)* +; CHECK: load volatile i32, i32* ; CHECK: store i32 %val, i32 addrspace(1)* define amdgpu_kernel void @volatile_load_flat_from_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 { - %tmp0 = addrspacecast i32 addrspace(1)* %input to i32 addrspace(4)* - %tmp1 = addrspacecast i32 addrspace(1)* %output to i32 addrspace(4)* - %val = load volatile i32, i32 addrspace(4)* %tmp0, align 4 - store i32 %val, i32 addrspace(4)* %tmp1, align 4 + %tmp0 = addrspacecast i32 addrspace(1)* %input to i32* + %tmp1 = addrspacecast i32 addrspace(1)* %output to i32* + %val = load volatile i32, i32* %tmp0, align 4 + store i32 %val, i32* %tmp1, align 4 ret void } ; CHECK-LABEL: @volatile_load_flat_from_constant( -; CHECK: load volatile i32, i32 addrspace(4)* +; CHECK: load volatile i32, i32* ; CHECK: store i32 %val, i32 addrspace(1)* define amdgpu_kernel void @volatile_load_flat_from_constant(i32 addrspace(2)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 { - %tmp0 = addrspacecast i32 addrspace(2)* %input to i32 addrspace(4)* - %tmp1 = addrspacecast i32 addrspace(1)* %output to i32 addrspace(4)* - %val = load volatile i32, i32 addrspace(4)* %tmp0, align 4 - store i32 %val, i32 addrspace(4)* %tmp1, align 4 + %tmp0 = addrspacecast i32 addrspace(2)* %input to i32* + %tmp1 = addrspacecast i32 addrspace(1)* %output to i32* + %val = load volatile i32, i32* %tmp0, align 4 + store i32 %val, i32* %tmp1, align 4 ret void } ; CHECK-LABEL: @volatile_load_flat_from_group( -; CHECK: load volatile i32, i32 addrspace(4)* +; CHECK: load volatile i32, i32* ; CHECK: store i32 %val, i32 addrspace(3)* define amdgpu_kernel void @volatile_load_flat_from_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 { - %tmp0 = addrspacecast i32 addrspace(3)* %input to i32 addrspace(4)* - %tmp1 = addrspacecast i32 addrspace(3)* %output to i32 addrspace(4)* - %val = load volatile i32, i32 addrspace(4)* %tmp0, align 4 - store i32 %val, i32 addrspace(4)* %tmp1, align 4 + %tmp0 = addrspacecast i32 addrspace(3)* %input to i32* + %tmp1 = addrspacecast i32 addrspace(3)* %output to i32* + %val = load volatile i32, i32* %tmp0, align 4 + store i32 %val, i32* %tmp1, align 4 ret void } ; CHECK-LABEL: @volatile_load_flat_from_private( -; CHECK: load volatile i32, i32 addrspace(4)* -; CHECK: store i32 %val, i32* -define amdgpu_kernel void @volatile_load_flat_from_private(i32* nocapture %input, i32* nocapture %output) #0 { - %tmp0 = addrspacecast i32* %input to i32 addrspace(4)* - %tmp1 = addrspacecast i32* %output to i32 addrspace(4)* - %val = load volatile i32, i32 addrspace(4)* %tmp0, align 4 - store i32 %val, i32 addrspace(4)* %tmp1, align 4 +; CHECK: load volatile i32, i32* +; CHECK: store i32 %val, i32 addrspace(5)* +define amdgpu_kernel void @volatile_load_flat_from_private(i32 addrspace(5)* nocapture %input, i32 addrspace(5)* nocapture %output) #0 { + %tmp0 = addrspacecast i32 addrspace(5)* %input to i32* + %tmp1 = addrspacecast i32 addrspace(5)* %output to i32* + %val = load volatile i32, i32* %tmp0, align 4 + store i32 %val, i32* %tmp1, align 4 ret void } ; CHECK-LABEL: @volatile_store_flat_to_global( ; CHECK: load i32, i32 addrspace(1)* -; CHECK: store volatile i32 %val, i32 addrspace(4)* +; CHECK: store volatile i32 %val, i32* define amdgpu_kernel void @volatile_store_flat_to_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 { - %tmp0 = addrspacecast i32 addrspace(1)* %input to i32 addrspace(4)* - %tmp1 = addrspacecast i32 addrspace(1)* %output to i32 addrspace(4)* - %val = load i32, i32 addrspace(4)* %tmp0, align 4 - store volatile i32 %val, i32 addrspace(4)* %tmp1, align 4 + %tmp0 = addrspacecast i32 addrspace(1)* %input to i32* + %tmp1 = addrspacecast i32 addrspace(1)* %output to i32* + %val = load i32, i32* %tmp0, align 4 + store volatile i32 %val, i32* %tmp1, align 4 ret void } ; CHECK-LABEL: @volatile_store_flat_to_group( ; CHECK: load i32, i32 addrspace(3)* -; CHECK: store volatile i32 %val, i32 addrspace(4)* +; CHECK: store volatile i32 %val, i32* define amdgpu_kernel void @volatile_store_flat_to_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 { - %tmp0 = addrspacecast i32 addrspace(3)* %input to i32 addrspace(4)* - %tmp1 = addrspacecast i32 addrspace(3)* %output to i32 addrspace(4)* - %val = load i32, i32 addrspace(4)* %tmp0, align 4 - store volatile i32 %val, i32 addrspace(4)* %tmp1, align 4 + %tmp0 = addrspacecast i32 addrspace(3)* %input to i32* + %tmp1 = addrspacecast i32 addrspace(3)* %output to i32* + %val = load i32, i32* %tmp0, align 4 + store volatile i32 %val, i32* %tmp1, align 4 ret void } ; CHECK-LABEL: @volatile_store_flat_to_private( -; CHECK: load i32, i32* -; CHECK: store volatile i32 %val, i32 addrspace(4)* -define amdgpu_kernel void @volatile_store_flat_to_private(i32* nocapture %input, i32* nocapture %output) #0 { - %tmp0 = addrspacecast i32* %input to i32 addrspace(4)* - %tmp1 = addrspacecast i32* %output to i32 addrspace(4)* - %val = load i32, i32 addrspace(4)* %tmp0, align 4 - store volatile i32 %val, i32 addrspace(4)* %tmp1, align 4 +; CHECK: load i32, i32 addrspace(5)* +; CHECK: store volatile i32 %val, i32* +define amdgpu_kernel void @volatile_store_flat_to_private(i32 addrspace(5)* nocapture %input, i32 addrspace(5)* nocapture %output) #0 { + %tmp0 = addrspacecast i32 addrspace(5)* %input to i32* + %tmp1 = addrspacecast i32 addrspace(5)* %output to i32* + %val = load i32, i32* %tmp0, align 4 + store volatile i32 %val, i32* %tmp1, align 4 ret void } ; CHECK-LABEL: @volatile_atomicrmw_add_group_to_flat( -; CHECK: addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)* -; CHECK: atomicrmw volatile add i32 addrspace(4)* +; CHECK: addrspacecast i32 addrspace(3)* %group.ptr to i32* +; CHECK: atomicrmw volatile add i32* define i32 @volatile_atomicrmw_add_group_to_flat(i32 addrspace(3)* %group.ptr, i32 %y) #0 { - %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)* - %ret = atomicrmw volatile add i32 addrspace(4)* %cast, i32 %y seq_cst + %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32* + %ret = atomicrmw volatile add i32* %cast, i32 %y seq_cst ret i32 %ret } ; CHECK-LABEL: @volatile_atomicrmw_add_global_to_flat( -; CHECK: addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)* -; CHECK: %ret = atomicrmw volatile add i32 addrspace(4)* +; CHECK: addrspacecast i32 addrspace(1)* %global.ptr to i32* +; CHECK: %ret = atomicrmw volatile add i32* define i32 @volatile_atomicrmw_add_global_to_flat(i32 addrspace(1)* %global.ptr, i32 %y) #0 { - %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)* - %ret = atomicrmw volatile add i32 addrspace(4)* %cast, i32 %y seq_cst + %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32* + %ret = atomicrmw volatile add i32* %cast, i32 %y seq_cst ret i32 %ret } ; CHECK-LABEL: @volatile_cmpxchg_global_to_flat( -; CHECK: addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)* -; CHECK: cmpxchg volatile i32 addrspace(4)* +; CHECK: addrspacecast i32 addrspace(1)* %global.ptr to i32* +; CHECK: cmpxchg volatile i32* define { i32, i1 } @volatile_cmpxchg_global_to_flat(i32 addrspace(1)* %global.ptr, i32 %cmp, i32 %val) #0 { - %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)* - %ret = cmpxchg volatile i32 addrspace(4)* %cast, i32 %cmp, i32 %val seq_cst monotonic + %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32* + %ret = cmpxchg volatile i32* %cast, i32 %cmp, i32 %val seq_cst monotonic ret { i32, i1 } %ret } ; CHECK-LABEL: @volatile_cmpxchg_group_to_flat( -; CHECK: addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)* -; CHECK: cmpxchg volatile i32 addrspace(4)* +; CHECK: addrspacecast i32 addrspace(3)* %group.ptr to i32* +; CHECK: cmpxchg volatile i32* define { i32, i1 } @volatile_cmpxchg_group_to_flat(i32 addrspace(3)* %group.ptr, i32 %cmp, i32 %val) #0 { - %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)* - %ret = cmpxchg volatile i32 addrspace(4)* %cast, i32 %cmp, i32 %val seq_cst monotonic + %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32* + %ret = cmpxchg volatile i32* %cast, i32 %cmp, i32 %val seq_cst monotonic ret { i32, i1 } %ret } ; FIXME: Shouldn't be losing names ; CHECK-LABEL: @volatile_memset_group_to_flat( -; CHECK: addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)* -; CHECK: call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 %1, i8 4, i64 32, i1 true) +; CHECK: addrspacecast i8 addrspace(3)* %group.ptr to i8* +; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %1, i8 4, i64 32, i1 true) define amdgpu_kernel void @volatile_memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) #0 { - %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)* - call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 %cast, i8 4, i64 32, i1 true) + %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8* + call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 32, i1 true) ret void } ; CHECK-LABEL: @volatile_memset_global_to_flat( -; CHECK: addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)* -; CHECK: call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 %1, i8 4, i64 32, i1 true) +; CHECK: addrspacecast i8 addrspace(1)* %global.ptr to i8* +; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %1, i8 4, i64 32, i1 true) define amdgpu_kernel void @volatile_memset_global_to_flat(i8 addrspace(1)* %global.ptr, i32 %y) #0 { - %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)* - call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 %cast, i8 4, i64 32, i1 true) + %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8* + call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 32, i1 true) ret void } -declare void @llvm.memset.p4i8.i64(i8 addrspace(4)* nocapture writeonly, i8, i64, i1) #1 +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #1 attributes #0 = { nounwind } attributes #1 = { argmemonly nounwind } diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll index 368dc6ab361..87acb1057af 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll @@ -1,38 +1,37 @@ -; RUN: opt -S -load-store-vectorizer -mattr=-unaligned-buffer-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s -; RUN: opt -S -load-store-vectorizer -mattr=+unaligned-buffer-access,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s +; RUN: opt -data-layout=A5 -S -load-store-vectorizer -mattr=-unaligned-buffer-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s +; RUN: opt -data-layout=A5 -S -load-store-vectorizer -mattr=+unaligned-buffer-access,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s -target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" ; ALL-LABEL: @load_unknown_offset_align1_i8( ; ALL: alloca [128 x i8], align 1 -; UNALIGNED: load <2 x i8>, <2 x i8>* %{{[0-9]+}}, align 1{{$}} +; UNALIGNED: load <2 x i8>, <2 x i8> addrspace(5)* %{{[0-9]+}}, align 1{{$}} -; ALIGNED: load i8, i8* %ptr0, align 1{{$}} -; ALIGNED: load i8, i8* %ptr1, align 1{{$}} +; ALIGNED: load i8, i8 addrspace(5)* %ptr0, align 1{{$}} +; ALIGNED: load i8, i8 addrspace(5)* %ptr1, align 1{{$}} define amdgpu_kernel void @load_unknown_offset_align1_i8(i8 addrspace(1)* noalias %out, i32 %offset) #0 { - %alloca = alloca [128 x i8], align 1 - %ptr0 = getelementptr inbounds [128 x i8], [128 x i8]* %alloca, i32 0, i32 %offset - %val0 = load i8, i8* %ptr0, align 1 - %ptr1 = getelementptr inbounds i8, i8* %ptr0, i32 1 - %val1 = load i8, i8* %ptr1, align 1 + %alloca = alloca [128 x i8], align 1, addrspace(5) + %ptr0 = getelementptr inbounds [128 x i8], [128 x i8] addrspace(5)* %alloca, i32 0, i32 %offset + %val0 = load i8, i8 addrspace(5)* %ptr0, align 1 + %ptr1 = getelementptr inbounds i8, i8 addrspace(5)* %ptr0, i32 1 + %val1 = load i8, i8 addrspace(5)* %ptr1, align 1 %add = add i8 %val0, %val1 store i8 %add, i8 addrspace(1)* %out ret void } ; ALL-LABEL: @load_unknown_offset_align1_i16( -; ALL: alloca [128 x i16], align 1{{$}} -; UNALIGNED: load <2 x i16>, <2 x i16>* %{{[0-9]+}}, align 1{{$}} +; ALL: alloca [128 x i16], align 1, addrspace(5){{$}} +; UNALIGNED: load <2 x i16>, <2 x i16> addrspace(5)* %{{[0-9]+}}, align 1{{$}} -; ALIGNED: load i16, i16* %ptr0, align 1{{$}} -; ALIGNED: load i16, i16* %ptr1, align 1{{$}} +; ALIGNED: load i16, i16 addrspace(5)* %ptr0, align 1{{$}} +; ALIGNED: load i16, i16 addrspace(5)* %ptr1, align 1{{$}} define amdgpu_kernel void @load_unknown_offset_align1_i16(i16 addrspace(1)* noalias %out, i32 %offset) #0 { - %alloca = alloca [128 x i16], align 1 - %ptr0 = getelementptr inbounds [128 x i16], [128 x i16]* %alloca, i32 0, i32 %offset - %val0 = load i16, i16* %ptr0, align 1 - %ptr1 = getelementptr inbounds i16, i16* %ptr0, i32 1 - %val1 = load i16, i16* %ptr1, align 1 + %alloca = alloca [128 x i16], align 1, addrspace(5) + %ptr0 = getelementptr inbounds [128 x i16], [128 x i16] addrspace(5)* %alloca, i32 0, i32 %offset + %val0 = load i16, i16 addrspace(5)* %ptr0, align 1 + %ptr1 = getelementptr inbounds i16, i16 addrspace(5)* %ptr0, i32 1 + %val1 = load i16, i16 addrspace(5)* %ptr1, align 1 %add = add i16 %val0, %val1 store i16 %add, i16 addrspace(1)* %out ret void @@ -43,16 +42,16 @@ define amdgpu_kernel void @load_unknown_offset_align1_i16(i16 addrspace(1)* noal ; ALL-LABEL: @load_unknown_offset_align1_i32( ; ALL: alloca [128 x i32], align 1 -; UNALIGNED: load <2 x i32>, <2 x i32>* %{{[0-9]+}}, align 1{{$}} +; UNALIGNED: load <2 x i32>, <2 x i32> addrspace(5)* %{{[0-9]+}}, align 1{{$}} -; ALIGNED: load i32, i32* %ptr0, align 1 -; ALIGNED: load i32, i32* %ptr1, align 1 +; ALIGNED: load i32, i32 addrspace(5)* %ptr0, align 1 +; ALIGNED: load i32, i32 addrspace(5)* %ptr1, align 1 define amdgpu_kernel void @load_unknown_offset_align1_i32(i32 addrspace(1)* noalias %out, i32 %offset) #0 { - %alloca = alloca [128 x i32], align 1 - %ptr0 = getelementptr inbounds [128 x i32], [128 x i32]* %alloca, i32 0, i32 %offset - %val0 = load i32, i32* %ptr0, align 1 - %ptr1 = getelementptr inbounds i32, i32* %ptr0, i32 1 - %val1 = load i32, i32* %ptr1, align 1 + %alloca = alloca [128 x i32], align 1, addrspace(5) + %ptr0 = getelementptr inbounds [128 x i32], [128 x i32] addrspace(5)* %alloca, i32 0, i32 %offset + %val0 = load i32, i32 addrspace(5)* %ptr0, align 1 + %ptr1 = getelementptr inbounds i32, i32 addrspace(5)* %ptr0, i32 1 + %val1 = load i32, i32 addrspace(5)* %ptr1, align 1 %add = add i32 %val0, %val1 store i32 %add, i32 addrspace(1)* %out ret void @@ -63,17 +62,17 @@ define amdgpu_kernel void @load_unknown_offset_align1_i32(i32 addrspace(1)* noal ; ALL-LABEL: @load_alloca16_unknown_offset_align1_i32( ; ALL: alloca [128 x i32], align 16 -; UNALIGNED: load <2 x i32>, <2 x i32>* %{{[0-9]+}}, align 1{{$}} +; UNALIGNED: load <2 x i32>, <2 x i32> addrspace(5)* %{{[0-9]+}}, align 1{{$}} ; FIXME: Should change alignment ; ALIGNED: load i32 ; ALIGNED: load i32 define amdgpu_kernel void @load_alloca16_unknown_offset_align1_i32(i32 addrspace(1)* noalias %out, i32 %offset) #0 { - %alloca = alloca [128 x i32], align 16 - %ptr0 = getelementptr inbounds [128 x i32], [128 x i32]* %alloca, i32 0, i32 %offset - %val0 = load i32, i32* %ptr0, align 1 - %ptr1 = getelementptr inbounds i32, i32* %ptr0, i32 1 - %val1 = load i32, i32* %ptr1, align 1 + %alloca = alloca [128 x i32], align 16, addrspace(5) + %ptr0 = getelementptr inbounds [128 x i32], [128 x i32] addrspace(5)* %alloca, i32 0, i32 %offset + %val0 = load i32, i32 addrspace(5)* %ptr0, align 1 + %ptr1 = getelementptr inbounds i32, i32 addrspace(5)* %ptr0, i32 1 + %val1 = load i32, i32 addrspace(5)* %ptr1, align 1 %add = add i32 %val0, %val1 store i32 %add, i32 addrspace(1)* %out ret void @@ -81,31 +80,31 @@ define amdgpu_kernel void @load_alloca16_unknown_offset_align1_i32(i32 addrspace ; ALL-LABEL: @store_unknown_offset_align1_i8( ; ALL: alloca [128 x i8], align 1 -; UNALIGNED: store <2 x i8> <i8 9, i8 10>, <2 x i8>* %{{[0-9]+}}, align 1{{$}} +; UNALIGNED: store <2 x i8> <i8 9, i8 10>, <2 x i8> addrspace(5)* %{{[0-9]+}}, align 1{{$}} -; ALIGNED: store i8 9, i8* %ptr0, align 1{{$}} -; ALIGNED: store i8 10, i8* %ptr1, align 1{{$}} +; ALIGNED: store i8 9, i8 addrspace(5)* %ptr0, align 1{{$}} +; ALIGNED: store i8 10, i8 addrspace(5)* %ptr1, align 1{{$}} define amdgpu_kernel void @store_unknown_offset_align1_i8(i8 addrspace(1)* noalias %out, i32 %offset) #0 { - %alloca = alloca [128 x i8], align 1 - %ptr0 = getelementptr inbounds [128 x i8], [128 x i8]* %alloca, i32 0, i32 %offset - store i8 9, i8* %ptr0, align 1 - %ptr1 = getelementptr inbounds i8, i8* %ptr0, i32 1 - store i8 10, i8* %ptr1, align 1 + %alloca = alloca [128 x i8], align 1, addrspace(5) + %ptr0 = getelementptr inbounds [128 x i8], [128 x i8] addrspace(5)* %alloca, i32 0, i32 %offset + store i8 9, i8 addrspace(5)* %ptr0, align 1 + %ptr1 = getelementptr inbounds i8, i8 addrspace(5)* %ptr0, i32 1 + store i8 10, i8 addrspace(5)* %ptr1, align 1 ret void } ; ALL-LABEL: @store_unknown_offset_align1_i16( ; ALL: alloca [128 x i16], align 1 -; UNALIGNED: store <2 x i16> <i16 9, i16 10>, <2 x i16>* %{{[0-9]+}}, align 1{{$}} +; UNALIGNED: store <2 x i16> <i16 9, i16 10>, <2 x i16> addrspace(5)* %{{[0-9]+}}, align 1{{$}} -; ALIGNED: store i16 9, i16* %ptr0, align 1{{$}} -; ALIGNED: store i16 10, i16* %ptr1, align 1{{$}} +; ALIGNED: store i16 9, i16 addrspace(5)* %ptr0, align 1{{$}} +; ALIGNED: store i16 10, i16 addrspace(5)* %ptr1, align 1{{$}} define amdgpu_kernel void @store_unknown_offset_align1_i16(i16 addrspace(1)* noalias %out, i32 %offset) #0 { - %alloca = alloca [128 x i16], align 1 - %ptr0 = getelementptr inbounds [128 x i16], [128 x i16]* %alloca, i32 0, i32 %offset - store i16 9, i16* %ptr0, align 1 - %ptr1 = getelementptr inbounds i16, i16* %ptr0, i32 1 - store i16 10, i16* %ptr1, align 1 + %alloca = alloca [128 x i16], align 1, addrspace(5) + %ptr0 = getelementptr inbounds [128 x i16], [128 x i16] addrspace(5)* %alloca, i32 0, i32 %offset + store i16 9, i16 addrspace(5)* %ptr0, align 1 + %ptr1 = getelementptr inbounds i16, i16 addrspace(5)* %ptr0, i32 1 + store i16 10, i16 addrspace(5)* %ptr1, align 1 ret void } @@ -115,16 +114,16 @@ define amdgpu_kernel void @store_unknown_offset_align1_i16(i16 addrspace(1)* noa ; ALL-LABEL: @store_unknown_offset_align1_i32( ; ALL: alloca [128 x i32], align 1 -; UNALIGNED: store <2 x i32> <i32 9, i32 10>, <2 x i32>* %{{[0-9]+}}, align 1{{$}} +; UNALIGNED: store <2 x i32> <i32 9, i32 10>, <2 x i32> addrspace(5)* %{{[0-9]+}}, align 1{{$}} -; ALIGNED: store i32 9, i32* %ptr0, align 1 -; ALIGNED: store i32 10, i32* %ptr1, align 1 +; ALIGNED: store i32 9, i32 addrspace(5)* %ptr0, align 1 +; ALIGNED: store i32 10, i32 addrspace(5)* %ptr1, align 1 define amdgpu_kernel void @store_unknown_offset_align1_i32(i32 addrspace(1)* noalias %out, i32 %offset) #0 { - %alloca = alloca [128 x i32], align 1 - %ptr0 = getelementptr inbounds [128 x i32], [128 x i32]* %alloca, i32 0, i32 %offset - store i32 9, i32* %ptr0, align 1 - %ptr1 = getelementptr inbounds i32, i32* %ptr0, i32 1 - store i32 10, i32* %ptr1, align 1 + %alloca = alloca [128 x i32], align 1, addrspace(5) + %ptr0 = getelementptr inbounds [128 x i32], [128 x i32] addrspace(5)* %alloca, i32 0, i32 %offset + store i32 9, i32 addrspace(5)* %ptr0, align 1 + %ptr1 = getelementptr inbounds i32, i32 addrspace(5)* %ptr0, i32 1 + store i32 10, i32 addrspace(5)* %ptr1, align 1 ret void } diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll index 0fcdc7b9083..43352783d10 100644 --- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll +++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll @@ -5,7 +5,6 @@ ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mattr=+max-private-element-size-8,+unaligned-scratch-access -load-store-vectorizer -S -o - %s | FileCheck -check-prefixes=ELT8,ELT8-UNALIGNED,UNALIGNED,ALL %s ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mattr=+max-private-element-size-16,+unaligned-scratch-access -load-store-vectorizer -S -o - %s | FileCheck -check-prefixes=ELT16,ELT16-UNALIGNED,UNALIGNED,ALL %s -target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" ; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v4i32 ; ALIGNED: store i32 @@ -17,52 +16,52 @@ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24: ; ELT8-UNALIGNED: store <2 x i32> ; ELT16-UNALIGNED: store <4 x i32> -define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32(i32* %out) #0 { - %out.gep.1 = getelementptr i32, i32* %out, i32 1 - %out.gep.2 = getelementptr i32, i32* %out, i32 2 - %out.gep.3 = getelementptr i32, i32* %out, i32 3 - - store i32 9, i32* %out - store i32 1, i32* %out.gep.1 - store i32 23, i32* %out.gep.2 - store i32 19, i32* %out.gep.3 +define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32(i32 addrspace(5)* %out) #0 { + %out.gep.1 = getelementptr i32, i32 addrspace(5)* %out, i32 1 + %out.gep.2 = getelementptr i32, i32 addrspace(5)* %out, i32 2 + %out.gep.3 = getelementptr i32, i32 addrspace(5)* %out, i32 3 + + store i32 9, i32 addrspace(5)* %out + store i32 1, i32 addrspace(5)* %out.gep.1 + store i32 23, i32 addrspace(5)* %out.gep.2 + store i32 19, i32 addrspace(5)* %out.gep.3 ret void } ; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v4i32_align1( -; ALIGNED: store i32 9, i32* %out, align 1 -; ALIGNED: store i32 1, i32* %out.gep.1, align 1 -; ALIGNED: store i32 23, i32* %out.gep.2, align 1 -; ALIGNED: store i32 19, i32* %out.gep.3, align 1 +; ALIGNED: store i32 9, i32 addrspace(5)* %out, align 1 +; ALIGNED: store i32 1, i32 addrspace(5)* %out.gep.1, align 1 +; ALIGNED: store i32 23, i32 addrspace(5)* %out.gep.2, align 1 +; ALIGNED: store i32 19, i32 addrspace(5)* %out.gep.3, align 1 -; ELT16-UNALIGNED: store <4 x i32> <i32 9, i32 1, i32 23, i32 19>, <4 x i32>* %1, align 1 +; ELT16-UNALIGNED: store <4 x i32> <i32 9, i32 1, i32 23, i32 19>, <4 x i32> addrspace(5)* %1, align 1 -; ELT8-UNALIGNED: store <2 x i32> <i32 9, i32 1>, <2 x i32>* %1, align 1 -; ELT8-UNALIGNED: store <2 x i32> <i32 23, i32 19>, <2 x i32>* %2, align 1 +; ELT8-UNALIGNED: store <2 x i32> <i32 9, i32 1>, <2 x i32> addrspace(5)* %1, align 1 +; ELT8-UNALIGNED: store <2 x i32> <i32 23, i32 19>, <2 x i32> addrspace(5)* %2, align 1 ; ELT4-UNALIGNED: store i32 ; ELT4-UNALIGNED: store i32 ; ELT4-UNALIGNED: store i32 ; ELT4-UNALIGNED: store i32 -define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32_align1(i32* %out) #0 { - %out.gep.1 = getelementptr i32, i32* %out, i32 1 - %out.gep.2 = getelementptr i32, i32* %out, i32 2 - %out.gep.3 = getelementptr i32, i32* %out, i32 3 - - store i32 9, i32* %out, align 1 - store i32 1, i32* %out.gep.1, align 1 - store i32 23, i32* %out.gep.2, align 1 - store i32 19, i32* %out.gep.3, align 1 +define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32_align1(i32 addrspace(5)* %out) #0 { + %out.gep.1 = getelementptr i32, i32 addrspace(5)* %out, i32 1 + %out.gep.2 = getelementptr i32, i32 addrspace(5)* %out, i32 2 + %out.gep.3 = getelementptr i32, i32 addrspace(5)* %out, i32 3 + + store i32 9, i32 addrspace(5)* %out, align 1 + store i32 1, i32 addrspace(5)* %out.gep.1, align 1 + store i32 23, i32 addrspace(5)* %out.gep.2, align 1 + store i32 19, i32 addrspace(5)* %out.gep.3, align 1 ret void } ; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v4i32_align2( -; ALIGNED: store i32 9, i32* %out, align 2 -; ALIGNED: store i32 1, i32* %out.gep.1, align 2 -; ALIGNED: store i32 23, i32* %out.gep.2, align 2 -; ALIGNED: store i32 19, i32* %out.gep.3, align 2 +; ALIGNED: store i32 9, i32 addrspace(5)* %out, align 2 +; ALIGNED: store i32 1, i32 addrspace(5)* %out.gep.1, align 2 +; ALIGNED: store i32 23, i32 addrspace(5)* %out.gep.2, align 2 +; ALIGNED: store i32 19, i32 addrspace(5)* %out.gep.3, align 2 -; ELT16-UNALIGNED: store <4 x i32> <i32 9, i32 1, i32 23, i32 19>, <4 x i32>* %1, align 2 +; ELT16-UNALIGNED: store <4 x i32> <i32 9, i32 1, i32 23, i32 19>, <4 x i32> addrspace(5)* %1, align 2 ; ELT8-UNALIGNED: store <2 x i32> ; ELT8-UNALIGNED: store <2 x i32> @@ -71,29 +70,29 @@ define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32_align1( ; ELT4-UNALIGNED: store i32 ; ELT4-UNALIGNED: store i32 ; ELT4-UNALIGNED: store i32 -define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32_align2(i32* %out) #0 { - %out.gep.1 = getelementptr i32, i32* %out, i32 1 - %out.gep.2 = getelementptr i32, i32* %out, i32 2 - %out.gep.3 = getelementptr i32, i32* %out, i32 3 - - store i32 9, i32* %out, align 2 - store i32 1, i32* %out.gep.1, align 2 - store i32 23, i32* %out.gep.2, align 2 - store i32 19, i32* %out.gep.3, align 2 +define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32_align2(i32 addrspace(5)* %out) #0 { + %out.gep.1 = getelementptr i32, i32 addrspace(5)* %out, i32 1 + %out.gep.2 = getelementptr i32, i32 addrspace(5)* %out, i32 2 + %out.gep.3 = getelementptr i32, i32 addrspace(5)* %out, i32 3 + + store i32 9, i32 addrspace(5)* %out, align 2 + store i32 1, i32 addrspace(5)* %out.gep.1, align 2 + store i32 23, i32 addrspace(5)* %out.gep.2, align 2 + store i32 19, i32 addrspace(5)* %out.gep.3, align 2 ret void } ; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v4i8( ; ALL: store <4 x i8> -define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i8(i8* %out) #0 { - %out.gep.1 = getelementptr i8, i8* %out, i32 1 - %out.gep.2 = getelementptr i8, i8* %out, i32 2 - %out.gep.3 = getelementptr i8, i8* %out, i32 3 - - store i8 9, i8* %out, align 4 - store i8 1, i8* %out.gep.1 - store i8 23, i8* %out.gep.2 - store i8 19, i8* %out.gep.3 +define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i8(i8 addrspace(5)* %out) #0 { + %out.gep.1 = getelementptr i8, i8 addrspace(5)* %out, i32 1 + %out.gep.2 = getelementptr i8, i8 addrspace(5)* %out, i32 2 + %out.gep.3 = getelementptr i8, i8 addrspace(5)* %out, i32 3 + + store i8 9, i8 addrspace(5)* %out, align 4 + store i8 1, i8 addrspace(5)* %out.gep.1 + store i8 23, i8 addrspace(5)* %out.gep.2 + store i8 19, i8 addrspace(5)* %out.gep.3 ret void } @@ -103,26 +102,26 @@ define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i8(i8* %out ; ALIGNED: store i8 ; ALIGNED: store i8 -; UNALIGNED: store <4 x i8> <i8 9, i8 1, i8 23, i8 19>, <4 x i8>* %1, align 1 -define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i8_align1(i8* %out) #0 { - %out.gep.1 = getelementptr i8, i8* %out, i32 1 - %out.gep.2 = getelementptr i8, i8* %out, i32 2 - %out.gep.3 = getelementptr i8, i8* %out, i32 3 +; UNALIGNED: store <4 x i8> <i8 9, i8 1, i8 23, i8 19>, <4 x i8> addrspace(5)* %1, align 1 +define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i8_align1(i8 addrspace(5)* %out) #0 { + %out.gep.1 = getelementptr i8, i8 addrspace(5)* %out, i32 1 + %out.gep.2 = getelementptr i8, i8 addrspace(5)* %out, i32 2 + %out.gep.3 = getelementptr i8, i8 addrspace(5)* %out, i32 3 - store i8 9, i8* %out, align 1 - store i8 1, i8* %out.gep.1, align 1 - store i8 23, i8* %out.gep.2, align 1 - store i8 19, i8* %out.gep.3, align 1 + store i8 9, i8 addrspace(5)* %out, align 1 + store i8 1, i8 addrspace(5)* %out.gep.1, align 1 + store i8 23, i8 addrspace(5)* %out.gep.2, align 1 + store i8 19, i8 addrspace(5)* %out.gep.3, align 1 ret void } ; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v2i16( ; ALL: store <2 x i16> -define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16(i16* %out) #0 { - %out.gep.1 = getelementptr i16, i16* %out, i32 1 +define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16(i16 addrspace(5)* %out) #0 { + %out.gep.1 = getelementptr i16, i16 addrspace(5)* %out, i32 1 - store i16 9, i16* %out, align 4 - store i16 12, i16* %out.gep.1 + store i16 9, i16 addrspace(5)* %out, align 4 + store i16 12, i16 addrspace(5)* %out.gep.1 ret void } @@ -130,12 +129,12 @@ define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16(i16* %o ; ALIGNED: store i16 ; ALIGNED: store i16 -; UNALIGNED: store <2 x i16> <i16 9, i16 12>, <2 x i16>* %1, align 2 -define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16_align2(i16* %out) #0 { - %out.gep.1 = getelementptr i16, i16* %out, i32 1 +; UNALIGNED: store <2 x i16> <i16 9, i16 12>, <2 x i16> addrspace(5)* %1, align 2 +define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16_align2(i16 addrspace(5)* %out) #0 { + %out.gep.1 = getelementptr i16, i16 addrspace(5)* %out, i32 1 - store i16 9, i16* %out, align 2 - store i16 12, i16* %out.gep.1, align 2 + store i16 9, i16 addrspace(5)* %out, align 2 + store i16 12, i16 addrspace(5)* %out.gep.1, align 2 ret void } @@ -143,22 +142,22 @@ define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16_align2( ; ALIGNED: store i16 ; ALIGNED: store i16 -; UNALIGNED: store <2 x i16> <i16 9, i16 12>, <2 x i16>* %1, align 1 -define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16_align1(i16* %out) #0 { - %out.gep.1 = getelementptr i16, i16* %out, i32 1 +; UNALIGNED: store <2 x i16> <i16 9, i16 12>, <2 x i16> addrspace(5)* %1, align 1 +define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16_align1(i16 addrspace(5)* %out) #0 { + %out.gep.1 = getelementptr i16, i16 addrspace(5)* %out, i32 1 - store i16 9, i16* %out, align 1 - store i16 12, i16* %out.gep.1, align 1 + store i16 9, i16 addrspace(5)* %out, align 1 + store i16 12, i16 addrspace(5)* %out.gep.1, align 1 ret void } ; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v2i16_align8( -; ALL: store <2 x i16> <i16 9, i16 12>, <2 x i16>* %1, align 8 -define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16_align8(i16* %out) #0 { - %out.gep.1 = getelementptr i16, i16* %out, i32 1 +; ALL: store <2 x i16> <i16 9, i16 12>, <2 x i16> addrspace(5)* %1, align 8 +define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16_align8(i16 addrspace(5)* %out) #0 { + %out.gep.1 = getelementptr i16, i16 addrspace(5)* %out, i32 1 - store i16 9, i16* %out, align 8 - store i16 12, i16* %out.gep.1, align 2 + store i16 9, i16 addrspace(5)* %out, align 8 + store i16 12, i16 addrspace(5)* %out.gep.1, align 2 ret void } @@ -179,13 +178,13 @@ define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16_align8( ; ELT16-ALIGNED: store i32 ; ELT16-UNALIGNED: store <3 x i32> -define amdgpu_kernel void @merge_private_store_3_vector_elts_loads_v4i32(i32* %out) #0 { - %out.gep.1 = getelementptr i32, i32* %out, i32 1 - %out.gep.2 = getelementptr i32, i32* %out, i32 2 +define amdgpu_kernel void @merge_private_store_3_vector_elts_loads_v4i32(i32 addrspace(5)* %out) #0 { + %out.gep.1 = getelementptr i32, i32 addrspace(5)* %out, i32 1 + %out.gep.2 = getelementptr i32, i32 addrspace(5)* %out, i32 2 - store i32 9, i32* %out - store i32 1, i32* %out.gep.1 - store i32 23, i32* %out.gep.2 + store i32 9, i32 addrspace(5)* %out + store i32 1, i32 addrspace(5)* %out.gep.1 + store i32 23, i32 addrspace(5)* %out.gep.2 ret void } @@ -202,13 +201,13 @@ define amdgpu_kernel void @merge_private_store_3_vector_elts_loads_v4i32(i32* %o ; ELT8-UNALIGNED: store i32 ; ELT16-UNALIGNED: store <3 x i32> -define amdgpu_kernel void @merge_private_store_3_vector_elts_loads_v4i32_align1(i32* %out) #0 { - %out.gep.1 = getelementptr i32, i32* %out, i32 1 - %out.gep.2 = getelementptr i32, i32* %out, i32 2 +define amdgpu_kernel void @merge_private_store_3_vector_elts_loads_v4i32_align1(i32 addrspace(5)* %out) #0 { + %out.gep.1 = getelementptr i32, i32 addrspace(5)* %out, i32 1 + %out.gep.2 = getelementptr i32, i32 addrspace(5)* %out, i32 2 - store i32 9, i32* %out, align 1 - store i32 1, i32* %out.gep.1, align 1 - store i32 23, i32* %out.gep.2, align 1 + store i32 9, i32 addrspace(5)* %out, align 1 + store i32 1, i32 addrspace(5)* %out.gep.1, align 1 + store i32 23, i32 addrspace(5)* %out.gep.2, align 1 ret void } @@ -218,13 +217,13 @@ define amdgpu_kernel void @merge_private_store_3_vector_elts_loads_v4i32_align1( ; ALIGNED: store i8 ; UNALIGNED: store <3 x i8> -define amdgpu_kernel void @merge_private_store_3_vector_elts_loads_v4i8_align1(i8* %out) #0 { - %out.gep.1 = getelementptr i8, i8* %out, i8 1 - %out.gep.2 = getelementptr i8, i8* %out, i8 2 +define amdgpu_kernel void @merge_private_store_3_vector_elts_loads_v4i8_align1(i8 addrspace(5)* %out) #0 { + %out.gep.1 = getelementptr i8, i8 addrspace(5)* %out, i8 1 + %out.gep.2 = getelementptr i8, i8 addrspace(5)* %out, i8 2 - store i8 9, i8* %out, align 1 - store i8 1, i8* %out.gep.1, align 1 - store i8 23, i8* %out.gep.2, align 1 + store i8 9, i8 addrspace(5)* %out, align 1 + store i8 1, i8 addrspace(5)* %out.gep.1, align 1 + store i8 23, i8 addrspace(5)* %out.gep.2, align 1 ret void } diff --git a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-crash.ll b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-crash.ll index 02c3c05e794..d558aa24304 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-crash.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-crash.ll @@ -1,6 +1,5 @@ ; RUN: llc < %s | FileCheck %s -target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" target triple = "amdgcn--" ; We need to compile this for a target where we have different address spaces, @@ -21,9 +20,9 @@ entry: loop: %idx0 = phi i32 [ %next_idx0, %loop ], [ 0, %entry ] - %0 = getelementptr inbounds i32, i32* null, i32 %idx0 + %0 = getelementptr inbounds i32, i32 addrspace(5)* null, i32 %idx0 %1 = getelementptr inbounds i32, i32 addrspace(1)* null, i32 %idx0 - store i32 1, i32* %0 + store i32 1, i32 addrspace(5)* %0 store i32 7, i32 addrspace(1)* %1 %next_idx0 = add nuw nsw i32 %idx0, 1 br label %loop diff --git a/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-for-private.ll b/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-for-private.ll index e986c3dc2a2..362001fff92 100644 --- a/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-for-private.ll +++ b/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-for-private.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=amdgcn-unknown-amdhsa -loop-unroll -S -amdgpu-unroll-threshold-private=20000 %s | FileCheck %s +; RUN: opt -data-layout=A5 -mtriple=amdgcn-unknown-amdhsa -loop-unroll -S -amdgpu-unroll-threshold-private=20000 %s | FileCheck %s ; Check that we full unroll loop to be able to eliminate alloca ; CHECK-LABEL: @non_invariant_ind @@ -9,13 +9,13 @@ define amdgpu_kernel void @non_invariant_ind(i32 addrspace(1)* nocapture %a, i32 %x) { entry: - %arr = alloca [64 x i32], align 4 + %arr = alloca [64 x i32], align 4, addrspace(5) %tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #1 br label %for.body for.cond.cleanup: ; preds = %for.body - %arrayidx5 = getelementptr inbounds [64 x i32], [64 x i32]* %arr, i32 0, i32 %x - %tmp15 = load i32, i32* %arrayidx5, align 4 + %arrayidx5 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %arr, i32 0, i32 %x + %tmp15 = load i32, i32 addrspace(5)* %arrayidx5, align 4 %arrayidx7 = getelementptr inbounds i32, i32 addrspace(1)* %a, i32 %tmp1 store i32 %tmp15, i32 addrspace(1)* %arrayidx7, align 4 ret void @@ -27,8 +27,8 @@ for.body: ; preds = %for.body, %entry %tmp16 = load i32, i32 addrspace(1)* %arrayidx, align 4 %add = add nsw i32 %i.015, %tmp1 %rem = srem i32 %add, 64 - %arrayidx3 = getelementptr inbounds [64 x i32], [64 x i32]* %arr, i32 0, i32 %rem - store i32 %tmp16, i32* %arrayidx3, align 4 + %arrayidx3 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %arr, i32 0, i32 %rem + store i32 %tmp16, i32 addrspace(5)* %arrayidx3, align 4 %inc = add nuw nsw i32 %i.015, 1 %exitcond = icmp eq i32 %inc, 100 br i1 %exitcond, label %for.cond.cleanup, label %for.body @@ -42,7 +42,7 @@ for.body: ; preds = %for.body, %entry define amdgpu_kernel void @invariant_ind(i32 addrspace(1)* nocapture %a, i32 %x) { entry: - %arr = alloca [64 x i32], align 4 + %arr = alloca [64 x i32], align 4, addrspace(5) %tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #1 br label %for.cond2.preheader @@ -54,8 +54,8 @@ for.cond2.preheader: ; preds = %for.cond.cleanup5, br label %for.body6 for.cond.cleanup: ; preds = %for.cond.cleanup5 - %arrayidx13 = getelementptr inbounds [64 x i32], [64 x i32]* %arr, i32 0, i32 %x - %tmp16 = load i32, i32* %arrayidx13, align 4 + %arrayidx13 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %arr, i32 0, i32 %x + %tmp16 = load i32, i32 addrspace(5)* %arrayidx13, align 4 %arrayidx15 = getelementptr inbounds i32, i32 addrspace(1)* %a, i32 %tmp1 store i32 %tmp16, i32 addrspace(1)* %arrayidx15, align 4 ret void @@ -69,8 +69,8 @@ for.body6: ; preds = %for.body6, %for.con %j.025 = phi i32 [ 0, %for.cond2.preheader ], [ %inc, %for.body6 ] %add = add nsw i32 %j.025, %tmp1 %rem = srem i32 %add, 64 - %arrayidx8 = getelementptr inbounds [64 x i32], [64 x i32]* %arr, i32 0, i32 %rem - store i32 %tmp15, i32* %arrayidx8, align 4 + %arrayidx8 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %arr, i32 0, i32 %rem + store i32 %tmp15, i32 addrspace(5)* %arrayidx8, align 4 %inc = add nuw nsw i32 %j.025, 1 %exitcond = icmp eq i32 %inc, 100 br i1 %exitcond, label %for.cond.cleanup5, label %for.body6 @@ -84,13 +84,13 @@ for.body6: ; preds = %for.body6, %for.con define amdgpu_kernel void @too_big(i32 addrspace(1)* nocapture %a, i32 %x) { entry: - %arr = alloca [256 x i32], align 4 + %arr = alloca [256 x i32], align 4, addrspace(5) %tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #1 br label %for.body for.cond.cleanup: ; preds = %for.body - %arrayidx5 = getelementptr inbounds [256 x i32], [256 x i32]* %arr, i32 0, i32 %x - %tmp15 = load i32, i32* %arrayidx5, align 4 + %arrayidx5 = getelementptr inbounds [256 x i32], [256 x i32] addrspace(5)* %arr, i32 0, i32 %x + %tmp15 = load i32, i32 addrspace(5)* %arrayidx5, align 4 %arrayidx7 = getelementptr inbounds i32, i32 addrspace(1)* %a, i32 %tmp1 store i32 %tmp15, i32 addrspace(1)* %arrayidx7, align 4 ret void @@ -102,8 +102,8 @@ for.body: ; preds = %for.body, %entry %tmp16 = load i32, i32 addrspace(1)* %arrayidx, align 4 %add = add nsw i32 %i.015, %tmp1 %rem = srem i32 %add, 64 - %arrayidx3 = getelementptr inbounds [256 x i32], [256 x i32]* %arr, i32 0, i32 %rem - store i32 %tmp16, i32* %arrayidx3, align 4 + %arrayidx3 = getelementptr inbounds [256 x i32], [256 x i32] addrspace(5)* %arr, i32 0, i32 %rem + store i32 %tmp16, i32 addrspace(5)* %arrayidx3, align 4 %inc = add nuw nsw i32 %i.015, 1 %exitcond = icmp eq i32 %inc, 100 br i1 %exitcond, label %for.cond.cleanup, label %for.body @@ -118,13 +118,13 @@ for.body: ; preds = %for.body, %entry define amdgpu_kernel void @dynamic_size_alloca(i32 addrspace(1)* nocapture %a, i32 %n, i32 %x) { entry: - %arr = alloca i32, i32 %n, align 4 + %arr = alloca i32, i32 %n, align 4, addrspace(5) %tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #1 br label %for.body for.cond.cleanup: ; preds = %for.body - %arrayidx5 = getelementptr inbounds i32, i32* %arr, i32 %x - %tmp15 = load i32, i32* %arrayidx5, align 4 + %arrayidx5 = getelementptr inbounds i32, i32 addrspace(5)* %arr, i32 %x + %tmp15 = load i32, i32 addrspace(5)* %arrayidx5, align 4 %arrayidx7 = getelementptr inbounds i32, i32 addrspace(1)* %a, i32 %tmp1 store i32 %tmp15, i32 addrspace(1)* %arrayidx7, align 4 ret void @@ -136,8 +136,8 @@ for.body: ; preds = %for.body, %entry %tmp16 = load i32, i32 addrspace(1)* %arrayidx, align 4 %add = add nsw i32 %i.015, %tmp1 %rem = srem i32 %add, 64 - %arrayidx3 = getelementptr inbounds i32, i32* %arr, i32 %rem - store i32 %tmp16, i32* %arrayidx3, align 4 + %arrayidx3 = getelementptr inbounds i32, i32 addrspace(5)* %arr, i32 %rem + store i32 %tmp16, i32 addrspace(5)* %arrayidx3, align 4 %inc = add nuw nsw i32 %i.015, 1 %exitcond = icmp eq i32 %inc, 100 br i1 %exitcond, label %for.cond.cleanup, label %for.body |