summaryrefslogtreecommitdiffstats
path: root/llvm/test/Transforms
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/Transforms')
-rw-r--r--llvm/test/Transforms/CodeGenPrepare/AMDGPU/sink-addrspacecast.ll42
-rw-r--r--llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll132
-rw-r--r--llvm/test/Transforms/InferAddressSpaces/AMDGPU/icmp.ll118
-rw-r--r--llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll82
-rw-r--r--llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-addrspacecast.ll30
-rw-r--r--llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll44
-rw-r--r--llvm/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll100
-rw-r--r--llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll96
-rw-r--r--llvm/test/Transforms/InferAddressSpaces/AMDGPU/old-pass-regressions.ll46
-rw-r--r--llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll210
-rw-r--r--llvm/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll128
-rw-r--r--llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll115
-rw-r--r--llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll189
-rw-r--r--llvm/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-crash.ll5
-rw-r--r--llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-for-private.ll42
15 files changed, 688 insertions, 691 deletions
diff --git a/llvm/test/Transforms/CodeGenPrepare/AMDGPU/sink-addrspacecast.ll b/llvm/test/Transforms/CodeGenPrepare/AMDGPU/sink-addrspacecast.ll
index adeba26a6d4..e21392f7fc3 100644
--- a/llvm/test/Transforms/CodeGenPrepare/AMDGPU/sink-addrspacecast.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/AMDGPU/sink-addrspacecast.ll
@@ -5,7 +5,7 @@
; CHECK: br
; CHECK-NOT: addrspacecast
define i64 @no_sink_local_to_flat(i1 %pred, i64 addrspace(3)* %ptr) {
- %ptr_cast = addrspacecast i64 addrspace(3)* %ptr to i64 addrspace(4)*
+ %ptr_cast = addrspacecast i64 addrspace(3)* %ptr to i64*
br i1 %pred, label %l1, label %l2
l1:
@@ -13,7 +13,7 @@ l1:
ret i64 %v1
l2:
- %v2 = load i64, i64 addrspace(4)* %ptr_cast
+ %v2 = load i64, i64* %ptr_cast
ret i64 %v2
}
@@ -21,16 +21,16 @@ l2:
; CHECK: addrspacecast
; CHECK: br
; CHECK-NOT: addrspacecast
-define i64 @no_sink_private_to_flat(i1 %pred, i64* %ptr) {
- %ptr_cast = addrspacecast i64* %ptr to i64 addrspace(4)*
+define i64 @no_sink_private_to_flat(i1 %pred, i64 addrspace(5)* %ptr) {
+ %ptr_cast = addrspacecast i64 addrspace(5)* %ptr to i64*
br i1 %pred, label %l1, label %l2
l1:
- %v1 = load i64, i64* %ptr
+ %v1 = load i64, i64 addrspace(5)* %ptr
ret i64 %v1
l2:
- %v2 = load i64, i64 addrspace(4)* %ptr_cast
+ %v2 = load i64, i64* %ptr_cast
ret i64 %v2
}
@@ -40,7 +40,7 @@ l2:
; CHECK: br
; CHECK: addrspacecast
define i64 @sink_global_to_flat(i1 %pred, i64 addrspace(1)* %ptr) {
- %ptr_cast = addrspacecast i64 addrspace(1)* %ptr to i64 addrspace(4)*
+ %ptr_cast = addrspacecast i64 addrspace(1)* %ptr to i64*
br i1 %pred, label %l1, label %l2
l1:
@@ -48,7 +48,7 @@ l1:
ret i64 %v1
l2:
- %v2 = load i64, i64 addrspace(4)* %ptr_cast
+ %v2 = load i64, i64* %ptr_cast
ret i64 %v2
}
@@ -56,12 +56,12 @@ l2:
; CHECK-NOT: addrspacecast
; CHECK: br
; CHECK: addrspacecast
-define i64 @sink_flat_to_global(i1 %pred, i64 addrspace(4)* %ptr) {
- %ptr_cast = addrspacecast i64 addrspace(4)* %ptr to i64 addrspace(1)*
+define i64 @sink_flat_to_global(i1 %pred, i64* %ptr) {
+ %ptr_cast = addrspacecast i64* %ptr to i64 addrspace(1)*
br i1 %pred, label %l1, label %l2
l1:
- %v1 = load i64, i64 addrspace(4)* %ptr
+ %v1 = load i64, i64* %ptr
ret i64 %v1
l2:
@@ -73,12 +73,12 @@ l2:
; CHECK-NOT: addrspacecast
; CHECK: br
; CHECK: addrspacecast
-define i64 @sink_flat_to_constant(i1 %pred, i64 addrspace(4)* %ptr) {
- %ptr_cast = addrspacecast i64 addrspace(4)* %ptr to i64 addrspace(2)*
+define i64 @sink_flat_to_constant(i1 %pred, i64* %ptr) {
+ %ptr_cast = addrspacecast i64* %ptr to i64 addrspace(2)*
br i1 %pred, label %l1, label %l2
l1:
- %v1 = load i64, i64 addrspace(4)* %ptr
+ %v1 = load i64, i64* %ptr
ret i64 %v1
l2:
@@ -90,12 +90,12 @@ l2:
; CHECK-NOT: addrspacecast
; CHECK: br
; CHECK: addrspacecast
-define i64 @sink_flat_to_local(i1 %pred, i64 addrspace(4)* %ptr) {
- %ptr_cast = addrspacecast i64 addrspace(4)* %ptr to i64 addrspace(3)*
+define i64 @sink_flat_to_local(i1 %pred, i64* %ptr) {
+ %ptr_cast = addrspacecast i64* %ptr to i64 addrspace(3)*
br i1 %pred, label %l1, label %l2
l1:
- %v1 = load i64, i64 addrspace(4)* %ptr
+ %v1 = load i64, i64* %ptr
ret i64 %v1
l2:
@@ -107,15 +107,15 @@ l2:
; CHECK-NOT: addrspacecast
; CHECK: br
; CHECK: addrspacecast
-define i64 @sink_flat_to_private(i1 %pred, i64 addrspace(4)* %ptr) {
- %ptr_cast = addrspacecast i64 addrspace(4)* %ptr to i64*
+define i64 @sink_flat_to_private(i1 %pred, i64* %ptr) {
+ %ptr_cast = addrspacecast i64* %ptr to i64 addrspace(5)*
br i1 %pred, label %l1, label %l2
l1:
- %v1 = load i64, i64 addrspace(4)* %ptr
+ %v1 = load i64, i64* %ptr
ret i64 %v1
l2:
- %v2 = load i64, i64* %ptr_cast
+ %v2 = load i64, i64 addrspace(5)* %ptr_cast
ret i64 %v2
}
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll
index 1eab7075403..f70c36ac7f7 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/basic.ll
@@ -3,69 +3,69 @@
; Trivial optimization of generic addressing
; CHECK-LABEL: @load_global_from_flat(
-; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(1)*
+; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(1)*
; CHECK-NEXT: %tmp1 = load float, float addrspace(1)* %tmp0
; CHECK-NEXT: ret float %tmp1
-define float @load_global_from_flat(float addrspace(4)* %generic_scalar) #0 {
- %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(1)*
+define float @load_global_from_flat(float* %generic_scalar) #0 {
+ %tmp0 = addrspacecast float* %generic_scalar to float addrspace(1)*
%tmp1 = load float, float addrspace(1)* %tmp0
ret float %tmp1
}
; CHECK-LABEL: @load_constant_from_flat(
-; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(2)*
+; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(2)*
; CHECK-NEXT: %tmp1 = load float, float addrspace(2)* %tmp0
; CHECK-NEXT: ret float %tmp1
-define float @load_constant_from_flat(float addrspace(4)* %generic_scalar) #0 {
- %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(2)*
+define float @load_constant_from_flat(float* %generic_scalar) #0 {
+ %tmp0 = addrspacecast float* %generic_scalar to float addrspace(2)*
%tmp1 = load float, float addrspace(2)* %tmp0
ret float %tmp1
}
; CHECK-LABEL: @load_group_from_flat(
-; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(3)*
+; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(3)*
; CHECK-NEXT: %tmp1 = load float, float addrspace(3)* %tmp0
; CHECK-NEXT: ret float %tmp1
-define float @load_group_from_flat(float addrspace(4)* %generic_scalar) #0 {
- %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(3)*
+define float @load_group_from_flat(float* %generic_scalar) #0 {
+ %tmp0 = addrspacecast float* %generic_scalar to float addrspace(3)*
%tmp1 = load float, float addrspace(3)* %tmp0
ret float %tmp1
}
; CHECK-LABEL: @load_private_from_flat(
-; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float*
-; CHECK-NEXT: %tmp1 = load float, float* %tmp0
+; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(5)*
+; CHECK-NEXT: %tmp1 = load float, float addrspace(5)* %tmp0
; CHECK-NEXT: ret float %tmp1
-define float @load_private_from_flat(float addrspace(4)* %generic_scalar) #0 {
- %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float*
- %tmp1 = load float, float* %tmp0
+define float @load_private_from_flat(float* %generic_scalar) #0 {
+ %tmp0 = addrspacecast float* %generic_scalar to float addrspace(5)*
+ %tmp1 = load float, float addrspace(5)* %tmp0
ret float %tmp1
}
; CHECK-LABEL: @store_global_from_flat(
-; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(1)*
+; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(1)*
; CHECK-NEXT: store float 0.000000e+00, float addrspace(1)* %tmp0
-define amdgpu_kernel void @store_global_from_flat(float addrspace(4)* %generic_scalar) #0 {
- %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(1)*
+define amdgpu_kernel void @store_global_from_flat(float* %generic_scalar) #0 {
+ %tmp0 = addrspacecast float* %generic_scalar to float addrspace(1)*
store float 0.0, float addrspace(1)* %tmp0
ret void
}
; CHECK-LABEL: @store_group_from_flat(
-; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(3)*
+; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(3)*
; CHECK-NEXT: store float 0.000000e+00, float addrspace(3)* %tmp0
-define amdgpu_kernel void @store_group_from_flat(float addrspace(4)* %generic_scalar) #0 {
- %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float addrspace(3)*
+define amdgpu_kernel void @store_group_from_flat(float* %generic_scalar) #0 {
+ %tmp0 = addrspacecast float* %generic_scalar to float addrspace(3)*
store float 0.0, float addrspace(3)* %tmp0
ret void
}
; CHECK-LABEL: @store_private_from_flat(
-; CHECK-NEXT: %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float*
-; CHECK-NEXT: store float 0.000000e+00, float* %tmp0
-define amdgpu_kernel void @store_private_from_flat(float addrspace(4)* %generic_scalar) #0 {
- %tmp0 = addrspacecast float addrspace(4)* %generic_scalar to float*
- store float 0.0, float* %tmp0
+; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(5)*
+; CHECK-NEXT: store float 0.000000e+00, float addrspace(5)* %tmp0
+define amdgpu_kernel void @store_private_from_flat(float* %generic_scalar) #0 {
+ %tmp0 = addrspacecast float* %generic_scalar to float addrspace(5)*
+ store float 0.0, float addrspace(5)* %tmp0
ret void
}
@@ -75,10 +75,10 @@ define amdgpu_kernel void @store_private_from_flat(float addrspace(4)* %generic_
; CHECK-NEXT: store i32 %val, i32 addrspace(1)* %output, align 4
; CHECK-NEXT: ret void
define amdgpu_kernel void @load_store_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 {
- %tmp0 = addrspacecast i32 addrspace(1)* %input to i32 addrspace(4)*
- %tmp1 = addrspacecast i32 addrspace(1)* %output to i32 addrspace(4)*
- %val = load i32, i32 addrspace(4)* %tmp0, align 4
- store i32 %val, i32 addrspace(4)* %tmp1, align 4
+ %tmp0 = addrspacecast i32 addrspace(1)* %input to i32*
+ %tmp1 = addrspacecast i32 addrspace(1)* %output to i32*
+ %val = load i32, i32* %tmp0, align 4
+ store i32 %val, i32* %tmp1, align 4
ret void
}
@@ -88,95 +88,95 @@ define amdgpu_kernel void @load_store_global(i32 addrspace(1)* nocapture %input,
; CHECK-NEXT: store i32 %val, i32 addrspace(3)* %output, align 4
; CHECK-NEXT: ret void
define amdgpu_kernel void @load_store_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 {
- %tmp0 = addrspacecast i32 addrspace(3)* %input to i32 addrspace(4)*
- %tmp1 = addrspacecast i32 addrspace(3)* %output to i32 addrspace(4)*
- %val = load i32, i32 addrspace(4)* %tmp0, align 4
- store i32 %val, i32 addrspace(4)* %tmp1, align 4
+ %tmp0 = addrspacecast i32 addrspace(3)* %input to i32*
+ %tmp1 = addrspacecast i32 addrspace(3)* %output to i32*
+ %val = load i32, i32* %tmp0, align 4
+ store i32 %val, i32* %tmp1, align 4
ret void
}
; Optimized to private load/store.
; CHECK-LABEL: @load_store_private(
-; CHECK-NEXT: %val = load i32, i32* %input, align 4
-; CHECK-NEXT: store i32 %val, i32* %output, align 4
+; CHECK-NEXT: %val = load i32, i32 addrspace(5)* %input, align 4
+; CHECK-NEXT: store i32 %val, i32 addrspace(5)* %output, align 4
; CHECK-NEXT: ret void
-define amdgpu_kernel void @load_store_private(i32* nocapture %input, i32* nocapture %output) #0 {
- %tmp0 = addrspacecast i32* %input to i32 addrspace(4)*
- %tmp1 = addrspacecast i32* %output to i32 addrspace(4)*
- %val = load i32, i32 addrspace(4)* %tmp0, align 4
- store i32 %val, i32 addrspace(4)* %tmp1, align 4
+define amdgpu_kernel void @load_store_private(i32 addrspace(5)* nocapture %input, i32 addrspace(5)* nocapture %output) #0 {
+ %tmp0 = addrspacecast i32 addrspace(5)* %input to i32*
+ %tmp1 = addrspacecast i32 addrspace(5)* %output to i32*
+ %val = load i32, i32* %tmp0, align 4
+ store i32 %val, i32* %tmp1, align 4
ret void
}
; No optimization. flat load/store.
; CHECK-LABEL: @load_store_flat(
-; CHECK-NEXT: %val = load i32, i32 addrspace(4)* %input, align 4
-; CHECK-NEXT: store i32 %val, i32 addrspace(4)* %output, align 4
+; CHECK-NEXT: %val = load i32, i32* %input, align 4
+; CHECK-NEXT: store i32 %val, i32* %output, align 4
; CHECK-NEXT: ret void
-define amdgpu_kernel void @load_store_flat(i32 addrspace(4)* nocapture %input, i32 addrspace(4)* nocapture %output) #0 {
- %val = load i32, i32 addrspace(4)* %input, align 4
- store i32 %val, i32 addrspace(4)* %output, align 4
+define amdgpu_kernel void @load_store_flat(i32* nocapture %input, i32* nocapture %output) #0 {
+ %val = load i32, i32* %input, align 4
+ store i32 %val, i32* %output, align 4
ret void
}
; CHECK-LABEL: @store_addrspacecast_ptr_value(
-; CHECK: %cast = addrspacecast i32 addrspace(1)* %input to i32 addrspace(4)*
-; CHECK-NEXT: store i32 addrspace(4)* %cast, i32 addrspace(4)* addrspace(1)* %output, align 4
-define amdgpu_kernel void @store_addrspacecast_ptr_value(i32 addrspace(1)* nocapture %input, i32 addrspace(4)* addrspace(1)* nocapture %output) #0 {
- %cast = addrspacecast i32 addrspace(1)* %input to i32 addrspace(4)*
- store i32 addrspace(4)* %cast, i32 addrspace(4)* addrspace(1)* %output, align 4
+; CHECK: %cast = addrspacecast i32 addrspace(1)* %input to i32*
+; CHECK-NEXT: store i32* %cast, i32* addrspace(1)* %output, align 4
+define amdgpu_kernel void @store_addrspacecast_ptr_value(i32 addrspace(1)* nocapture %input, i32* addrspace(1)* nocapture %output) #0 {
+ %cast = addrspacecast i32 addrspace(1)* %input to i32*
+ store i32* %cast, i32* addrspace(1)* %output, align 4
ret void
}
; CHECK-LABEL: @atomicrmw_add_global_to_flat(
; CHECK-NEXT: %ret = atomicrmw add i32 addrspace(1)* %global.ptr, i32 %y seq_cst
define i32 @atomicrmw_add_global_to_flat(i32 addrspace(1)* %global.ptr, i32 %y) #0 {
- %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)*
- %ret = atomicrmw add i32 addrspace(4)* %cast, i32 %y seq_cst
+ %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32*
+ %ret = atomicrmw add i32* %cast, i32 %y seq_cst
ret i32 %ret
}
; CHECK-LABEL: @atomicrmw_add_group_to_flat(
; CHECK-NEXT: %ret = atomicrmw add i32 addrspace(3)* %group.ptr, i32 %y seq_cst
define i32 @atomicrmw_add_group_to_flat(i32 addrspace(3)* %group.ptr, i32 %y) #0 {
- %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)*
- %ret = atomicrmw add i32 addrspace(4)* %cast, i32 %y seq_cst
+ %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32*
+ %ret = atomicrmw add i32* %cast, i32 %y seq_cst
ret i32 %ret
}
; CHECK-LABEL: @cmpxchg_global_to_flat(
; CHECK: %ret = cmpxchg i32 addrspace(1)* %global.ptr, i32 %cmp, i32 %val seq_cst monotonic
define { i32, i1 } @cmpxchg_global_to_flat(i32 addrspace(1)* %global.ptr, i32 %cmp, i32 %val) #0 {
- %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)*
- %ret = cmpxchg i32 addrspace(4)* %cast, i32 %cmp, i32 %val seq_cst monotonic
+ %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32*
+ %ret = cmpxchg i32* %cast, i32 %cmp, i32 %val seq_cst monotonic
ret { i32, i1 } %ret
}
; CHECK-LABEL: @cmpxchg_group_to_flat(
; CHECK: %ret = cmpxchg i32 addrspace(3)* %group.ptr, i32 %cmp, i32 %val seq_cst monotonic
define { i32, i1 } @cmpxchg_group_to_flat(i32 addrspace(3)* %group.ptr, i32 %cmp, i32 %val) #0 {
- %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)*
- %ret = cmpxchg i32 addrspace(4)* %cast, i32 %cmp, i32 %val seq_cst monotonic
+ %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32*
+ %ret = cmpxchg i32* %cast, i32 %cmp, i32 %val seq_cst monotonic
ret { i32, i1 } %ret
}
; Not pointer operand
; CHECK-LABEL: @cmpxchg_group_to_flat_wrong_operand(
-; CHECK: %cast.cmp = addrspacecast i32 addrspace(3)* %cmp.ptr to i32 addrspace(4)*
-; CHECK: %ret = cmpxchg i32 addrspace(4)* addrspace(3)* %cas.ptr, i32 addrspace(4)* %cast.cmp, i32 addrspace(4)* %val seq_cst monotonic
-define { i32 addrspace(4)*, i1 } @cmpxchg_group_to_flat_wrong_operand(i32 addrspace(4)* addrspace(3)* %cas.ptr, i32 addrspace(3)* %cmp.ptr, i32 addrspace(4)* %val) #0 {
- %cast.cmp = addrspacecast i32 addrspace(3)* %cmp.ptr to i32 addrspace(4)*
- %ret = cmpxchg i32 addrspace(4)* addrspace(3)* %cas.ptr, i32 addrspace(4)* %cast.cmp, i32 addrspace(4)* %val seq_cst monotonic
- ret { i32 addrspace(4)*, i1 } %ret
+; CHECK: %cast.cmp = addrspacecast i32 addrspace(3)* %cmp.ptr to i32*
+; CHECK: %ret = cmpxchg i32* addrspace(3)* %cas.ptr, i32* %cast.cmp, i32* %val seq_cst monotonic
+define { i32*, i1 } @cmpxchg_group_to_flat_wrong_operand(i32* addrspace(3)* %cas.ptr, i32 addrspace(3)* %cmp.ptr, i32* %val) #0 {
+ %cast.cmp = addrspacecast i32 addrspace(3)* %cmp.ptr to i32*
+ %ret = cmpxchg i32* addrspace(3)* %cas.ptr, i32* %cast.cmp, i32* %val seq_cst monotonic
+ ret { i32*, i1 } %ret
}
; Null pointer in local addr space
; CHECK-LABEL: @local_nullptr
-; CHECK: icmp ne i8 addrspace(3)* %a, addrspacecast (i8* null to i8 addrspace(3)*)
+; CHECK: icmp ne i8 addrspace(3)* %a, addrspacecast (i8 addrspace(5)* null to i8 addrspace(3)*)
; CHECK-NOT: i8 addrspace(3)* null
define void @local_nullptr(i32 addrspace(1)* nocapture %results, i8 addrspace(3)* %a) {
entry:
- %tobool = icmp ne i8 addrspace(3)* %a, addrspacecast (i8* null to i8 addrspace(3)*)
+ %tobool = icmp ne i8 addrspace(3)* %a, addrspacecast (i8 addrspace(5)* null to i8 addrspace(3)*)
%conv = zext i1 %tobool to i32
store i32 %conv, i32 addrspace(1)* %results, align 4
ret void
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/icmp.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/icmp.ll
index b185ede2657..0a5e7a513e0 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/icmp.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/icmp.ll
@@ -3,57 +3,57 @@
; CHECK-LABEL: @icmp_flat_cmp_self(
; CHECK: %cmp = icmp eq i32 addrspace(3)* %group.ptr.0, %group.ptr.0
define i1 @icmp_flat_cmp_self(i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* %cast0, %cast0
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cmp = icmp eq i32* %cast0, %cast0
ret i1 %cmp
}
; CHECK-LABEL: @icmp_flat_flat_from_group(
; CHECK: %cmp = icmp eq i32 addrspace(3)* %group.ptr.0, %group.ptr.1
define i1 @icmp_flat_flat_from_group(i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* %cast0, %cast1
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
+ %cmp = icmp eq i32* %cast0, %cast1
ret i1 %cmp
}
; CHECK-LABEL: @icmp_mismatch_flat_from_group_private(
-; CHECK: %1 = addrspacecast i32* %private.ptr.0 to i32 addrspace(4)*
-; CHECK: %2 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)*
-; CHECK: %cmp = icmp eq i32 addrspace(4)* %1, %2
-define i1 @icmp_mismatch_flat_from_group_private(i32* %private.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 {
- %cast0 = addrspacecast i32* %private.ptr.0 to i32 addrspace(4)*
- %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* %cast0, %cast1
+; CHECK: %1 = addrspacecast i32 addrspace(5)* %private.ptr.0 to i32*
+; CHECK: %2 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
+; CHECK: %cmp = icmp eq i32* %1, %2
+define i1 @icmp_mismatch_flat_from_group_private(i32 addrspace(5)* %private.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 {
+ %cast0 = addrspacecast i32 addrspace(5)* %private.ptr.0 to i32*
+ %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
+ %cmp = icmp eq i32* %cast0, %cast1
ret i1 %cmp
}
; CHECK-LABEL: @icmp_flat_group_flat(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
-; CHECK: %cmp = icmp eq i32 addrspace(4)* %1, %flat.ptr.1
-define i1 @icmp_flat_group_flat(i32 addrspace(3)* %group.ptr.0, i32 addrspace(4)* %flat.ptr.1) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* %cast0, %flat.ptr.1
+; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+; CHECK: %cmp = icmp eq i32* %1, %flat.ptr.1
+define i1 @icmp_flat_group_flat(i32 addrspace(3)* %group.ptr.0, i32* %flat.ptr.1) #0 {
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cmp = icmp eq i32* %cast0, %flat.ptr.1
ret i1 %cmp
}
; CHECK-LABEL: @icmp_flat_flat_group(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)*
-; CHECK: %cmp = icmp eq i32 addrspace(4)* %flat.ptr.0, %1
-define i1 @icmp_flat_flat_group(i32 addrspace(4)* %flat.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 {
- %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* %flat.ptr.0, %cast1
+; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
+; CHECK: %cmp = icmp eq i32* %flat.ptr.0, %1
+define i1 @icmp_flat_flat_group(i32* %flat.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 {
+ %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
+ %cmp = icmp eq i32* %flat.ptr.0, %cast1
ret i1 %cmp
}
; Keeping as cmp addrspace(3)* is better
; CHECK-LABEL: @icmp_flat_to_group_cmp(
-; CHECK: %cast0 = addrspacecast i32 addrspace(4)* %flat.ptr.0 to i32 addrspace(3)*
-; CHECK: %cast1 = addrspacecast i32 addrspace(4)* %flat.ptr.1 to i32 addrspace(3)*
+; CHECK: %cast0 = addrspacecast i32* %flat.ptr.0 to i32 addrspace(3)*
+; CHECK: %cast1 = addrspacecast i32* %flat.ptr.1 to i32 addrspace(3)*
; CHECK: %cmp = icmp eq i32 addrspace(3)* %cast0, %cast1
-define i1 @icmp_flat_to_group_cmp(i32 addrspace(4)* %flat.ptr.0, i32 addrspace(4)* %flat.ptr.1) #0 {
- %cast0 = addrspacecast i32 addrspace(4)* %flat.ptr.0 to i32 addrspace(3)*
- %cast1 = addrspacecast i32 addrspace(4)* %flat.ptr.1 to i32 addrspace(3)*
+define i1 @icmp_flat_to_group_cmp(i32* %flat.ptr.0, i32* %flat.ptr.1) #0 {
+ %cast0 = addrspacecast i32* %flat.ptr.0 to i32 addrspace(3)*
+ %cast1 = addrspacecast i32* %flat.ptr.1 to i32 addrspace(3)*
%cmp = icmp eq i32 addrspace(3)* %cast0, %cast1
ret i1 %cmp
}
@@ -62,35 +62,35 @@ define i1 @icmp_flat_to_group_cmp(i32 addrspace(4)* %flat.ptr.0, i32 addrspace(4
; constant cast if this is OK to change if 0 is a valid pointer.
; CHECK-LABEL: @icmp_group_flat_cmp_null(
-; CHECK: %cmp = icmp eq i32 addrspace(3)* %group.ptr.0, addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*)
+; CHECK: %cmp = icmp eq i32 addrspace(3)* %group.ptr.0, addrspacecast (i32* null to i32 addrspace(3)*)
define i1 @icmp_group_flat_cmp_null(i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* %cast0, null
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cmp = icmp eq i32* %cast0, null
ret i1 %cmp
}
; CHECK-LABEL: @icmp_group_flat_cmp_constant_inttoptr(
-; CHECK: %cmp = icmp eq i32 addrspace(3)* %group.ptr.0, addrspacecast (i32 addrspace(4)* inttoptr (i64 400 to i32 addrspace(4)*) to i32 addrspace(3)*)
+; CHECK: %cmp = icmp eq i32 addrspace(3)* %group.ptr.0, addrspacecast (i32* inttoptr (i64 400 to i32*) to i32 addrspace(3)*)
define i1 @icmp_group_flat_cmp_constant_inttoptr(i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* %cast0, inttoptr (i64 400 to i32 addrspace(4)*)
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cmp = icmp eq i32* %cast0, inttoptr (i64 400 to i32*)
ret i1 %cmp
}
; CHECK-LABEL: @icmp_mismatch_flat_group_private_cmp_null(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
-; CHECK: %cmp = icmp eq i32 addrspace(4)* %1, addrspacecast (i32* null to i32 addrspace(4)*)
+; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+; CHECK: %cmp = icmp eq i32* %1, addrspacecast (i32 addrspace(5)* null to i32*)
define i1 @icmp_mismatch_flat_group_private_cmp_null(i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* %cast0, addrspacecast (i32* null to i32 addrspace(4)*)
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cmp = icmp eq i32* %cast0, addrspacecast (i32 addrspace(5)* null to i32*)
ret i1 %cmp
}
; CHECK-LABEL: @icmp_mismatch_flat_group_private_cmp_undef(
; CHECK: %cmp = icmp eq i32 addrspace(3)* %group.ptr.0, undef
define i1 @icmp_mismatch_flat_group_private_cmp_undef(i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* %cast0, addrspacecast (i32* undef to i32 addrspace(4)*)
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cmp = icmp eq i32* %cast0, addrspacecast (i32 addrspace(5)* undef to i32*)
ret i1 %cmp
}
@@ -98,62 +98,62 @@ define i1 @icmp_mismatch_flat_group_private_cmp_undef(i32 addrspace(3)* %group.p
@global0 = internal addrspace(1) global i32 0, align 4
; CHECK-LABEL: @icmp_mismatch_flat_group_global_cmp_gv(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
-; CHECK: %cmp = icmp eq i32 addrspace(4)* %1, addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*)
+; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+; CHECK: %cmp = icmp eq i32* %1, addrspacecast (i32 addrspace(1)* @global0 to i32*)
define i1 @icmp_mismatch_flat_group_global_cmp_gv(i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* %cast0, addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*)
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cmp = icmp eq i32* %cast0, addrspacecast (i32 addrspace(1)* @global0 to i32*)
ret i1 %cmp
}
; CHECK-LABEL: @icmp_mismatch_group_global_cmp_gv_gv(
-; CHECK: %cmp = icmp eq i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds0 to i32 addrspace(4)*), addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*)
+; CHECK: %cmp = icmp eq i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), addrspacecast (i32 addrspace(1)* @global0 to i32*)
define i1 @icmp_mismatch_group_global_cmp_gv_gv(i32 addrspace(3)* %group.ptr.0) #0 {
- %cmp = icmp eq i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds0 to i32 addrspace(4)*), addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*)
+ %cmp = icmp eq i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), addrspacecast (i32 addrspace(1)* @global0 to i32*)
ret i1 %cmp
}
; CHECK-LABEL: @icmp_group_flat_cmp_undef(
; CHECK: %cmp = icmp eq i32 addrspace(3)* %group.ptr.0, undef
define i1 @icmp_group_flat_cmp_undef(i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* %cast0, undef
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cmp = icmp eq i32* %cast0, undef
ret i1 %cmp
}
; Test non-canonical orders
; CHECK-LABEL: @icmp_mismatch_flat_group_private_cmp_null_swap(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
-; CHECK: %cmp = icmp eq i32 addrspace(4)* addrspacecast (i32* null to i32 addrspace(4)*), %1
+; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+; CHECK: %cmp = icmp eq i32* addrspacecast (i32 addrspace(5)* null to i32*), %1
define i1 @icmp_mismatch_flat_group_private_cmp_null_swap(i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* addrspacecast (i32* null to i32 addrspace(4)*), %cast0
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cmp = icmp eq i32* addrspacecast (i32 addrspace(5)* null to i32*), %cast0
ret i1 %cmp
}
; CHECK-LABEL: @icmp_group_flat_cmp_undef_swap(
; CHECK: %cmp = icmp eq i32 addrspace(3)* undef, %group.ptr.0
define i1 @icmp_group_flat_cmp_undef_swap(i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* undef, %cast0
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cmp = icmp eq i32* undef, %cast0
ret i1 %cmp
}
; CHECK-LABEL: @icmp_mismatch_flat_group_private_cmp_undef_swap(
; CHECK: %cmp = icmp eq i32 addrspace(3)* undef, %group.ptr.0
define i1 @icmp_mismatch_flat_group_private_cmp_undef_swap(i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cmp = icmp eq i32 addrspace(4)* addrspacecast (i32* undef to i32 addrspace(4)*), %cast0
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cmp = icmp eq i32* addrspacecast (i32 addrspace(5)* undef to i32*), %cast0
ret i1 %cmp
}
; TODO: Should be handled
; CHECK-LABEL: @icmp_flat_flat_from_group_vector(
-; CHECK: %cmp = icmp eq <2 x i32 addrspace(4)*> %cast0, %cast1
+; CHECK: %cmp = icmp eq <2 x i32*> %cast0, %cast1
define <2 x i1> @icmp_flat_flat_from_group_vector(<2 x i32 addrspace(3)*> %group.ptr.0, <2 x i32 addrspace(3)*> %group.ptr.1) #0 {
- %cast0 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.0 to <2 x i32 addrspace(4)*>
- %cast1 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.1 to <2 x i32 addrspace(4)*>
- %cmp = icmp eq <2 x i32 addrspace(4)*> %cast0, %cast1
+ %cast0 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.0 to <2 x i32*>
+ %cast1 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.1 to <2 x i32*>
+ %cmp = icmp eq <2 x i32*> %cast0, %cast1
ret <2 x i1> %cmp
}
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll
index 52067cd37bb..3096d8144dc 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-address-space.ll
@@ -30,29 +30,29 @@
; CHECK: ret void
define amdgpu_kernel void @load_store_lds_f32(i32 %i, float %v) #0 {
bb:
- %tmp = load float, float addrspace(4)* addrspacecast (float addrspace(3)* @scalar to float addrspace(4)*), align 4
+ %tmp = load float, float* addrspacecast (float addrspace(3)* @scalar to float*), align 4
call void @use(float %tmp)
- store float %v, float addrspace(4)* addrspacecast (float addrspace(3)* @scalar to float addrspace(4)*), align 4
+ store float %v, float* addrspacecast (float addrspace(3)* @scalar to float*), align 4
call void @llvm.amdgcn.s.barrier()
- %tmp1 = addrspacecast float addrspace(3)* @scalar to float addrspace(4)*
- %tmp2 = load float, float addrspace(4)* %tmp1, align 4
+ %tmp1 = addrspacecast float addrspace(3)* @scalar to float*
+ %tmp2 = load float, float* %tmp1, align 4
call void @use(float %tmp2)
- store float %v, float addrspace(4)* %tmp1, align 4
+ store float %v, float* %tmp1, align 4
call void @llvm.amdgcn.s.barrier()
- %tmp3 = load float, float addrspace(4)* getelementptr inbounds ([10 x float], [10 x float] addrspace(4)* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float] addrspace(4)*), i32 0, i32 5), align 4
+ %tmp3 = load float, float* getelementptr inbounds ([10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5), align 4
call void @use(float %tmp3)
- store float %v, float addrspace(4)* getelementptr inbounds ([10 x float], [10 x float] addrspace(4)* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float] addrspace(4)*), i32 0, i32 5), align 4
+ store float %v, float* getelementptr inbounds ([10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5), align 4
call void @llvm.amdgcn.s.barrier()
- %tmp4 = getelementptr inbounds [10 x float], [10 x float] addrspace(4)* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float] addrspace(4)*), i32 0, i32 5
- %tmp5 = load float, float addrspace(4)* %tmp4, align 4
+ %tmp4 = getelementptr inbounds [10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i32 0, i32 5
+ %tmp5 = load float, float* %tmp4, align 4
call void @use(float %tmp5)
- store float %v, float addrspace(4)* %tmp4, align 4
+ store float %v, float* %tmp4, align 4
call void @llvm.amdgcn.s.barrier()
- %tmp6 = addrspacecast [10 x float] addrspace(3)* @array to [10 x float] addrspace(4)*
- %tmp7 = getelementptr inbounds [10 x float], [10 x float] addrspace(4)* %tmp6, i32 0, i32 %i
- %tmp8 = load float, float addrspace(4)* %tmp7, align 4
+ %tmp6 = addrspacecast [10 x float] addrspace(3)* @array to [10 x float]*
+ %tmp7 = getelementptr inbounds [10 x float], [10 x float]* %tmp6, i32 0, i32 %i
+ %tmp8 = load float, float* %tmp7, align 4
call void @use(float %tmp8)
- store float %v, float addrspace(4)* %tmp7, align 4
+ store float %v, float* %tmp7, align 4
call void @llvm.amdgcn.s.barrier()
ret void
}
@@ -61,7 +61,7 @@ bb:
; CHECK: %tmp = load i32, i32 addrspace(3)* bitcast (float addrspace(3)* @scalar to i32 addrspace(3)*), align 4
define i32 @constexpr_load_int_from_float_lds() #0 {
bb:
- %tmp = load i32, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @scalar to i32 addrspace(3)*) to i32 addrspace(4)*), align 4
+ %tmp = load i32, i32* addrspacecast (i32 addrspace(3)* bitcast (float addrspace(3)* @scalar to i32 addrspace(3)*) to i32*), align 4
ret i32 %tmp
}
@@ -73,18 +73,18 @@ bb:
; CHECK: ret i32 %tmp4
define i32 @load_int_from_global_float(float addrspace(1)* %input, i32 %i, i32 %j) #0 {
bb:
- %tmp = addrspacecast float addrspace(1)* %input to float addrspace(4)*
- %tmp1 = getelementptr float, float addrspace(4)* %tmp, i32 %i
- %tmp2 = getelementptr float, float addrspace(4)* %tmp1, i32 %j
- %tmp3 = bitcast float addrspace(4)* %tmp2 to i32 addrspace(4)*
- %tmp4 = load i32, i32 addrspace(4)* %tmp3
+ %tmp = addrspacecast float addrspace(1)* %input to float*
+ %tmp1 = getelementptr float, float* %tmp, i32 %i
+ %tmp2 = getelementptr float, float* %tmp1, i32 %j
+ %tmp3 = bitcast float* %tmp2 to i32*
+ %tmp4 = load i32, i32* %tmp3
ret i32 %tmp4
}
; CHECK-LABEL: @nested_const_expr(
; CHECK: store i32 1, i32 addrspace(3)* bitcast (float addrspace(3)* getelementptr inbounds ([10 x float], [10 x float] addrspace(3)* @array, i64 0, i64 1) to i32 addrspace(3)*), align 4
define amdgpu_kernel void @nested_const_expr() #0 {
- store i32 1, i32 addrspace(4)* bitcast (float addrspace(4)* getelementptr ([10 x float], [10 x float] addrspace(4)* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float] addrspace(4)*), i64 0, i64 1) to i32 addrspace(4)*), align 4
+ store i32 1, i32* bitcast (float* getelementptr ([10 x float], [10 x float]* addrspacecast ([10 x float] addrspace(3)* @array to [10 x float]*), i64 0, i64 1) to i32*), align 4
ret void
}
@@ -95,10 +95,10 @@ define amdgpu_kernel void @nested_const_expr() #0 {
; CHECK-NEXT: ret void
define amdgpu_kernel void @rauw(float addrspace(1)* %input) #0 {
bb:
- %generic_input = addrspacecast float addrspace(1)* %input to float addrspace(4)*
- %addr = getelementptr float, float addrspace(4)* %generic_input, i64 10
- %v = load float, float addrspace(4)* %addr
- store float %v, float addrspace(4)* %addr
+ %generic_input = addrspacecast float addrspace(1)* %input to float*
+ %addr = getelementptr float, float* %generic_input, i64 10
+ %v = load float, float* %addr
+ store float %v, float* %addr
ret void
}
@@ -119,27 +119,27 @@ bb:
; CHECK: br i1 %exit_cond, label %exit, label %loop
define amdgpu_kernel void @loop() #0 {
entry:
- %p = addrspacecast [10 x float] addrspace(3)* @array to float addrspace(4)*
- %end = getelementptr float, float addrspace(4)* %p, i64 10
+ %p = addrspacecast [10 x float] addrspace(3)* @array to float*
+ %end = getelementptr float, float* %p, i64 10
br label %loop
loop: ; preds = %loop, %entry
- %i = phi float addrspace(4)* [ %p, %entry ], [ %i2, %loop ]
- %v = load float, float addrspace(4)* %i
+ %i = phi float* [ %p, %entry ], [ %i2, %loop ]
+ %v = load float, float* %i
call void @use(float %v)
- %i2 = getelementptr float, float addrspace(4)* %i, i64 1
- %exit_cond = icmp eq float addrspace(4)* %i2, %end
+ %i2 = getelementptr float, float* %i, i64 1
+ %exit_cond = icmp eq float* %i2, %end
br i1 %exit_cond, label %exit, label %loop
exit: ; preds = %loop
ret void
}
-@generic_end = external addrspace(1) global float addrspace(4)*
+@generic_end = external addrspace(1) global float*
; CHECK-LABEL: @loop_with_generic_bound(
; CHECK: %p = bitcast [10 x float] addrspace(3)* @array to float addrspace(3)*
-; CHECK: %end = load float addrspace(4)*, float addrspace(4)* addrspace(1)* @generic_end
+; CHECK: %end = load float*, float* addrspace(1)* @generic_end
; CHECK: br label %loop
; CHECK: loop:
@@ -147,21 +147,21 @@ exit: ; preds = %loop
; CHECK: %v = load float, float addrspace(3)* %i
; CHECK: call void @use(float %v)
; CHECK: %i2 = getelementptr float, float addrspace(3)* %i, i64 1
-; CHECK: %0 = addrspacecast float addrspace(3)* %i2 to float addrspace(4)*
-; CHECK: %exit_cond = icmp eq float addrspace(4)* %0, %end
+; CHECK: %0 = addrspacecast float addrspace(3)* %i2 to float*
+; CHECK: %exit_cond = icmp eq float* %0, %end
; CHECK: br i1 %exit_cond, label %exit, label %loop
define amdgpu_kernel void @loop_with_generic_bound() #0 {
entry:
- %p = addrspacecast [10 x float] addrspace(3)* @array to float addrspace(4)*
- %end = load float addrspace(4)*, float addrspace(4)* addrspace(1)* @generic_end
+ %p = addrspacecast [10 x float] addrspace(3)* @array to float*
+ %end = load float*, float* addrspace(1)* @generic_end
br label %loop
loop: ; preds = %loop, %entry
- %i = phi float addrspace(4)* [ %p, %entry ], [ %i2, %loop ]
- %v = load float, float addrspace(4)* %i
+ %i = phi float* [ %p, %entry ], [ %i2, %loop ]
+ %v = load float, float* %i
call void @use(float %v)
- %i2 = getelementptr float, float addrspace(4)* %i, i64 1
- %exit_cond = icmp eq float addrspace(4)* %i2, %end
+ %i2 = getelementptr float, float* %i, i64 1
+ %exit_cond = icmp eq float* %i2, %end
br i1 %exit_cond, label %exit, label %loop
exit: ; preds = %loop
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-addrspacecast.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-addrspacecast.ll
index 74a2595252d..2d4bf148d84 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-addrspacecast.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-addrspacecast.ll
@@ -8,9 +8,9 @@
; CHECK-NEXT: store i32 8, i32 addrspace(3)* %gep0, align 8
; CHECK-NEXT: ret void
define void @addrspacecast_gep_addrspacecast(i32 addrspace(3)* %ptr) {
- %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
- %gep0 = getelementptr i32, i32 addrspace(4)* %asc0, i64 9
- %asc1 = addrspacecast i32 addrspace(4)* %gep0 to i32 addrspace(3)*
+ %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32*
+ %gep0 = getelementptr i32, i32* %asc0, i64 9
+ %asc1 = addrspacecast i32* %gep0 to i32 addrspace(3)*
store i32 8, i32 addrspace(3)* %asc1, align 8
ret void
}
@@ -21,9 +21,9 @@ define void @addrspacecast_gep_addrspacecast(i32 addrspace(3)* %ptr) {
; CHECK-NEXT: store i8 8, i8 addrspace(3)* [[CAST]], align 8
; CHECK-NEXT: ret void
define void @addrspacecast_different_pointee_type(i32 addrspace(3)* %ptr) {
- %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
- %gep0 = getelementptr i32, i32 addrspace(4)* %asc0, i64 9
- %asc1 = addrspacecast i32 addrspace(4)* %gep0 to i8 addrspace(3)*
+ %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32*
+ %gep0 = getelementptr i32, i32* %asc0, i64 9
+ %asc1 = addrspacecast i32* %gep0 to i8 addrspace(3)*
store i8 8, i8 addrspace(3)* %asc1, align 8
ret void
}
@@ -33,24 +33,24 @@ define void @addrspacecast_different_pointee_type(i32 addrspace(3)* %ptr) {
; CHECK-NEXT: store volatile i32 addrspace(3)* %gep0, i32 addrspace(3)* addrspace(1)* undef
; CHECK-NEXT: ret void
define void @addrspacecast_to_memory(i32 addrspace(3)* %ptr) {
- %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
- %gep0 = getelementptr i32, i32 addrspace(4)* %asc0, i64 9
- %asc1 = addrspacecast i32 addrspace(4)* %gep0 to i32 addrspace(3)*
+ %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32*
+ %gep0 = getelementptr i32, i32* %asc0, i64 9
+ %asc1 = addrspacecast i32* %gep0 to i32 addrspace(3)*
store volatile i32 addrspace(3)* %asc1, i32 addrspace(3)* addrspace(1)* undef
ret void
}
; CHECK-LABEL: @multiuse_addrspacecast_gep_addrspacecast(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
-; CHECK-NEXT: store volatile i32 addrspace(4)* %1, i32 addrspace(4)* addrspace(1)* undef
+; CHECK: %1 = addrspacecast i32 addrspace(3)* %ptr to i32*
+; CHECK-NEXT: store volatile i32* %1, i32* addrspace(1)* undef
; CHECK-NEXT: %gep0 = getelementptr i32, i32 addrspace(3)* %ptr, i64 9
; CHECK-NEXT: store i32 8, i32 addrspace(3)* %gep0, align 8
; CHECK-NEXT: ret void
define void @multiuse_addrspacecast_gep_addrspacecast(i32 addrspace(3)* %ptr) {
- %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
- store volatile i32 addrspace(4)* %asc0, i32 addrspace(4)* addrspace(1)* undef
- %gep0 = getelementptr i32, i32 addrspace(4)* %asc0, i64 9
- %asc1 = addrspacecast i32 addrspace(4)* %gep0 to i32 addrspace(3)*
+ %asc0 = addrspacecast i32 addrspace(3)* %ptr to i32*
+ store volatile i32* %asc0, i32* addrspace(1)* undef
+ %gep0 = getelementptr i32, i32* %asc0, i64 9
+ %asc1 = addrspacecast i32* %gep0 to i32 addrspace(3)*
store i32 8, i32 addrspace(3)* %asc1, align 8
ret void
}
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll
index e2c255dcb3e..f9b788f07fd 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/infer-getelementptr.ll
@@ -9,8 +9,8 @@
; CHECK: %gep0 = getelementptr inbounds double, double addrspace(3)* getelementptr inbounds ([648 x double], [648 x double] addrspace(3)* @lds, i64 0, i64 384), i64 %idx0
; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %gep0, align 8
define void @simplified_constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) {
- %gep0 = getelementptr inbounds double, double addrspace(4)* addrspacecast (double addrspace(3)* getelementptr inbounds ([648 x double], [648 x double] addrspace(3)* @lds, i64 0, i64 384) to double addrspace(4)*), i64 %idx0
- %asc = addrspacecast double addrspace(4)* %gep0 to double addrspace(3)*
+ %gep0 = getelementptr inbounds double, double* addrspacecast (double addrspace(3)* getelementptr inbounds ([648 x double], [648 x double] addrspace(3)* @lds, i64 0, i64 384) to double*), i64 %idx0
+ %asc = addrspacecast double* %gep0 to double addrspace(3)*
store double 1.000000e+00, double addrspace(3)* %asc, align 8
ret void
}
@@ -19,8 +19,8 @@ define void @simplified_constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) {
; CHECK-NEXT: %gep0 = getelementptr inbounds double, double addrspace(3)* getelementptr inbounds ([648 x double], [648 x double] addrspace(3)* @lds, i64 0, i64 384), i64 %idx0
; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %gep0, align 8
define void @constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) {
- %gep0 = getelementptr inbounds double, double addrspace(4)* getelementptr ([648 x double], [648 x double] addrspace(4)* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double] addrspace(4)*), i64 0, i64 384), i64 %idx0
- %asc = addrspacecast double addrspace(4)* %gep0 to double addrspace(3)*
+ %gep0 = getelementptr inbounds double, double* getelementptr ([648 x double], [648 x double]* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double]*), i64 0, i64 384), i64 %idx0
+ %asc = addrspacecast double* %gep0 to double addrspace(3)*
store double 1.0, double addrspace(3)* %asc, align 8
ret void
}
@@ -30,27 +30,27 @@ define void @constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) {
; CHECK-NEXT: %gep1 = getelementptr inbounds double, double addrspace(3)* %gep0, i64 %idx1
; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %gep1, align 8
define void @constexpr_gep_gep_addrspacecast(i64 %idx0, i64 %idx1) {
- %gep0 = getelementptr inbounds double, double addrspace(4)* getelementptr ([648 x double], [648 x double] addrspace(4)* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double] addrspace(4)*), i64 0, i64 384), i64 %idx0
- %gep1 = getelementptr inbounds double, double addrspace(4)* %gep0, i64 %idx1
- %asc = addrspacecast double addrspace(4)* %gep1 to double addrspace(3)*
+ %gep0 = getelementptr inbounds double, double* getelementptr ([648 x double], [648 x double]* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double]*), i64 0, i64 384), i64 %idx0
+ %gep1 = getelementptr inbounds double, double* %gep0, i64 %idx1
+ %asc = addrspacecast double* %gep1 to double addrspace(3)*
store double 1.0, double addrspace(3)* %asc, align 8
ret void
}
; Don't crash
; CHECK-LABEL: @vector_gep(
-; CHECK: %cast = addrspacecast <4 x [1024 x i32] addrspace(3)*> %array to <4 x [1024 x i32] addrspace(4)*>
+; CHECK: %cast = addrspacecast <4 x [1024 x i32] addrspace(3)*> %array to <4 x [1024 x i32]*>
define amdgpu_kernel void @vector_gep(<4 x [1024 x i32] addrspace(3)*> %array) nounwind {
- %cast = addrspacecast <4 x [1024 x i32] addrspace(3)*> %array to <4 x [1024 x i32] addrspace(4)*>
- %p = getelementptr [1024 x i32], <4 x [1024 x i32] addrspace(4)*> %cast, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
- %p0 = extractelement <4 x i32 addrspace(4)*> %p, i32 0
- %p1 = extractelement <4 x i32 addrspace(4)*> %p, i32 1
- %p2 = extractelement <4 x i32 addrspace(4)*> %p, i32 2
- %p3 = extractelement <4 x i32 addrspace(4)*> %p, i32 3
- store i32 99, i32 addrspace(4)* %p0
- store i32 99, i32 addrspace(4)* %p1
- store i32 99, i32 addrspace(4)* %p2
- store i32 99, i32 addrspace(4)* %p3
+ %cast = addrspacecast <4 x [1024 x i32] addrspace(3)*> %array to <4 x [1024 x i32]*>
+ %p = getelementptr [1024 x i32], <4 x [1024 x i32]*> %cast, <4 x i16> zeroinitializer, <4 x i16> <i16 16, i16 16, i16 16, i16 16>
+ %p0 = extractelement <4 x i32*> %p, i32 0
+ %p1 = extractelement <4 x i32*> %p, i32 1
+ %p2 = extractelement <4 x i32*> %p, i32 2
+ %p3 = extractelement <4 x i32*> %p, i32 3
+ store i32 99, i32* %p0
+ store i32 99, i32* %p1
+ store i32 99, i32* %p2
+ store i32 99, i32* %p3
ret void
}
@@ -61,12 +61,12 @@ define amdgpu_kernel void @vector_gep(<4 x [1024 x i32] addrspace(3)*> %array) n
; CHECK-NEXT: store double 1.000000e+00, double addrspace(3)* %gep1, align 8
; CHECK-NEXT: ret void
define void @repeated_constexpr_gep_addrspacecast(i64 %idx0, i64 %idx1) {
- %gep0 = getelementptr inbounds double, double addrspace(4)* getelementptr ([648 x double], [648 x double] addrspace(4)* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double] addrspace(4)*), i64 0, i64 384), i64 %idx0
- %asc0 = addrspacecast double addrspace(4)* %gep0 to double addrspace(3)*
+ %gep0 = getelementptr inbounds double, double* getelementptr ([648 x double], [648 x double]* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double]*), i64 0, i64 384), i64 %idx0
+ %asc0 = addrspacecast double* %gep0 to double addrspace(3)*
store double 1.0, double addrspace(3)* %asc0, align 8
- %gep1 = getelementptr inbounds double, double addrspace(4)* getelementptr ([648 x double], [648 x double] addrspace(4)* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double] addrspace(4)*), i64 0, i64 384), i64 %idx1
- %asc1 = addrspacecast double addrspace(4)* %gep1 to double addrspace(3)*
+ %gep1 = getelementptr inbounds double, double* getelementptr ([648 x double], [648 x double]* addrspacecast ([648 x double] addrspace(3)* @lds to [648 x double]*), i64 0, i64 384), i64 %idx1
+ %asc1 = addrspacecast double* %gep1 to double addrspace(3)*
store double 1.0, double addrspace(3)* %asc1, align 8
ret void
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll
index ca6138d3fb0..723ce41588a 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll
@@ -3,143 +3,143 @@
; CHECK-LABEL: @objectsize_group_to_flat_i32(
; CHECK: %val = call i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)* %group.ptr, i1 true, i1 false)
define i32 @objectsize_group_to_flat_i32(i8 addrspace(3)* %group.ptr) #0 {
- %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
- %val = call i32 @llvm.objectsize.i32.p4i8(i8 addrspace(4)* %cast, i1 true, i1 false)
+ %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8*
+ %val = call i32 @llvm.objectsize.i32.p0i8(i8* %cast, i1 true, i1 false)
ret i32 %val
}
; CHECK-LABEL: @objectsize_global_to_flat_i64(
; CHECK: %val = call i64 @llvm.objectsize.i64.p3i8(i8 addrspace(3)* %global.ptr, i1 true, i1 false)
define i64 @objectsize_global_to_flat_i64(i8 addrspace(3)* %global.ptr) #0 {
- %cast = addrspacecast i8 addrspace(3)* %global.ptr to i8 addrspace(4)*
- %val = call i64 @llvm.objectsize.i64.p4i8(i8 addrspace(4)* %cast, i1 true, i1 false)
+ %cast = addrspacecast i8 addrspace(3)* %global.ptr to i8*
+ %val = call i64 @llvm.objectsize.i64.p0i8(i8* %cast, i1 true, i1 false)
ret i64 %val
}
; CHECK-LABEL: @atomicinc_global_to_flat_i32(
; CHECK: call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %global.ptr, i32 %y, i32 0, i32 0, i1 false)
define i32 @atomicinc_global_to_flat_i32(i32 addrspace(1)* %global.ptr, i32 %y) #0 {
- %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)*
- %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %cast, i32 %y, i32 0, i32 0, i1 false)
+ %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32*
+ %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %cast, i32 %y, i32 0, i32 0, i1 false)
ret i32 %ret
}
; CHECK-LABEL: @atomicinc_group_to_flat_i32(
; CHECK: %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %group.ptr, i32 %y, i32 0, i32 0, i1 false)
define i32 @atomicinc_group_to_flat_i32(i32 addrspace(3)* %group.ptr, i32 %y) #0 {
- %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)*
- %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %cast, i32 %y, i32 0, i32 0, i1 false)
+ %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32*
+ %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %cast, i32 %y, i32 0, i32 0, i1 false)
ret i32 %ret
}
; CHECK-LABEL: @atomicinc_global_to_flat_i64(
; CHECK: call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %global.ptr, i64 %y, i32 0, i32 0, i1 false)
define i64 @atomicinc_global_to_flat_i64(i64 addrspace(1)* %global.ptr, i64 %y) #0 {
- %cast = addrspacecast i64 addrspace(1)* %global.ptr to i64 addrspace(4)*
- %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %cast, i64 %y, i32 0, i32 0, i1 false)
+ %cast = addrspacecast i64 addrspace(1)* %global.ptr to i64*
+ %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 false)
ret i64 %ret
}
; CHECK-LABEL: @atomicinc_group_to_flat_i64(
; CHECK: call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %group.ptr, i64 %y, i32 0, i32 0, i1 false)
define i64 @atomicinc_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y) #0 {
- %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)*
- %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %cast, i64 %y, i32 0, i32 0, i1 false)
+ %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64*
+ %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 false)
ret i64 %ret
}
; CHECK-LABEL: @atomicdec_global_to_flat_i32(
; CHECK: call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %global.ptr, i32 %val, i32 0, i32 0, i1 false)
define i32 @atomicdec_global_to_flat_i32(i32 addrspace(1)* %global.ptr, i32 %val) #0 {
- %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)*
- %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %cast, i32 %val, i32 0, i32 0, i1 false)
+ %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32*
+ %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %cast, i32 %val, i32 0, i32 0, i1 false)
ret i32 %ret
}
; CHECK-LABEL: @atomicdec_group_to_flat_i32(
; CHECK: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %group.ptr, i32 %val, i32 0, i32 0, i1 false)
define i32 @atomicdec_group_to_flat_i32(i32 addrspace(3)* %group.ptr, i32 %val) #0 {
- %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)*
- %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %cast, i32 %val, i32 0, i32 0, i1 false)
+ %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32*
+ %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %cast, i32 %val, i32 0, i32 0, i1 false)
ret i32 %ret
}
; CHECK-LABEL: @atomicdec_global_to_flat_i64(
; CHECK: call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %global.ptr, i64 %y, i32 0, i32 0, i1 false)
define i64 @atomicdec_global_to_flat_i64(i64 addrspace(1)* %global.ptr, i64 %y) #0 {
- %cast = addrspacecast i64 addrspace(1)* %global.ptr to i64 addrspace(4)*
- %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %cast, i64 %y, i32 0, i32 0, i1 false)
+ %cast = addrspacecast i64 addrspace(1)* %global.ptr to i64*
+ %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 false)
ret i64 %ret
}
; CHECK-LABEL: @atomicdec_group_to_flat_i64(
; CHECK: call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %group.ptr, i64 %y, i32 0, i32 0, i1 false
define i64 @atomicdec_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y) #0 {
- %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)*
- %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %cast, i64 %y, i32 0, i32 0, i1 false)
+ %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64*
+ %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 false)
ret i64 %ret
}
; CHECK-LABEL: @volatile_atomicinc_group_to_flat_i64(
-; CHECK-NEXT: %1 = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)*
-; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %1, i64 %y, i32 0, i32 0, i1 true)
+; CHECK-NEXT: %1 = addrspacecast i64 addrspace(3)* %group.ptr to i64*
+; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %1, i64 %y, i32 0, i32 0, i1 true)
define i64 @volatile_atomicinc_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y) #0 {
- %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)*
- %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %cast, i64 %y, i32 0, i32 0, i1 true)
+ %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64*
+ %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 true)
ret i64 %ret
}
; CHECK-LABEL: @volatile_atomicdec_global_to_flat_i32(
-; CHECK-NEXT: %1 = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)*
-; CHECK-NEXT: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %1, i32 %val, i32 0, i32 0, i1 true)
+; CHECK-NEXT: %1 = addrspacecast i32 addrspace(1)* %global.ptr to i32*
+; CHECK-NEXT: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %1, i32 %val, i32 0, i32 0, i1 true)
define i32 @volatile_atomicdec_global_to_flat_i32(i32 addrspace(1)* %global.ptr, i32 %val) #0 {
- %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)*
- %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %cast, i32 %val, i32 0, i32 0, i1 true)
+ %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32*
+ %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %cast, i32 %val, i32 0, i32 0, i1 true)
ret i32 %ret
}
; CHECK-LABEL: @volatile_atomicdec_group_to_flat_i32(
-; CHECK-NEXT: %1 = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)*
-; CHECK-NEXT: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %1, i32 %val, i32 0, i32 0, i1 true)
+; CHECK-NEXT: %1 = addrspacecast i32 addrspace(3)* %group.ptr to i32*
+; CHECK-NEXT: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %1, i32 %val, i32 0, i32 0, i1 true)
define i32 @volatile_atomicdec_group_to_flat_i32(i32 addrspace(3)* %group.ptr, i32 %val) #0 {
- %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)*
- %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %cast, i32 %val, i32 0, i32 0, i1 true)
+ %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32*
+ %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %cast, i32 %val, i32 0, i32 0, i1 true)
ret i32 %ret
}
; CHECK-LABEL: @volatile_atomicdec_global_to_flat_i64(
-; CHECK-NEXT: %1 = addrspacecast i64 addrspace(1)* %global.ptr to i64 addrspace(4)*
-; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %1, i64 %y, i32 0, i32 0, i1 true)
+; CHECK-NEXT: %1 = addrspacecast i64 addrspace(1)* %global.ptr to i64*
+; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %1, i64 %y, i32 0, i32 0, i1 true)
define i64 @volatile_atomicdec_global_to_flat_i64(i64 addrspace(1)* %global.ptr, i64 %y) #0 {
- %cast = addrspacecast i64 addrspace(1)* %global.ptr to i64 addrspace(4)*
- %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %cast, i64 %y, i32 0, i32 0, i1 true)
+ %cast = addrspacecast i64 addrspace(1)* %global.ptr to i64*
+ %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 true)
ret i64 %ret
}
; CHECK-LABEL: @volatile_atomicdec_group_to_flat_i64(
-; CHECK-NEXT: %1 = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)*
-; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %1, i64 %y, i32 0, i32 0, i1 true)
+; CHECK-NEXT: %1 = addrspacecast i64 addrspace(3)* %group.ptr to i64*
+; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %1, i64 %y, i32 0, i32 0, i1 true)
define i64 @volatile_atomicdec_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y) #0 {
- %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)*
- %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %cast, i64 %y, i32 0, i32 0, i1 true)
+ %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64*
+ %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 true)
ret i64 %ret
}
; CHECK-LABEL: @invalid_variable_volatile_atomicinc_group_to_flat_i64(
-; CHECK-NEXT: %1 = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)*
-; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %1, i64 %y, i32 0, i32 0, i1 %volatile.var)
+; CHECK-NEXT: %1 = addrspacecast i64 addrspace(3)* %group.ptr to i64*
+; CHECK-NEXT: %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %1, i64 %y, i32 0, i32 0, i1 %volatile.var)
define i64 @invalid_variable_volatile_atomicinc_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y, i1 %volatile.var) #0 {
- %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)*
- %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %cast, i64 %y, i32 0, i32 0, i1 %volatile.var)
+ %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64*
+ %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %cast, i64 %y, i32 0, i32 0, i1 %volatile.var)
ret i64 %ret
}
-declare i32 @llvm.objectsize.i32.p4i8(i8 addrspace(4)*, i1, i1) #1
-declare i64 @llvm.objectsize.i64.p4i8(i8 addrspace(4)*, i1, i1) #1
-declare i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* nocapture, i32, i32, i32, i1) #2
-declare i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* nocapture, i64, i32, i32, i1) #2
-declare i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* nocapture, i32, i32, i32, i1) #2
-declare i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* nocapture, i64, i32, i32, i1) #2
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1, i1) #1
+declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1) #1
+declare i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* nocapture, i32, i32, i32, i1) #2
+declare i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* nocapture, i64, i32, i32, i1) #2
+declare i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* nocapture, i32, i32, i32, i1) #2
+declare i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* nocapture, i64, i32, i32, i1) #2
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll
index dd0bbfdc6d2..d8987f8b630 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll
@@ -3,100 +3,100 @@
; CHECK-LABEL: @memset_group_to_flat(
; CHECK: call void @llvm.memset.p3i8.i64(i8 addrspace(3)* align 4 %group.ptr, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
define amdgpu_kernel void @memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) #0 {
- %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
- call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 %cast, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+ %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8*
+ call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
}
; CHECK-LABEL: @memset_global_to_flat(
; CHECK: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* align 4 %global.ptr, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
define amdgpu_kernel void @memset_global_to_flat(i8 addrspace(1)* %global.ptr, i32 %y) #0 {
- %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)*
- call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 %cast, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+ %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8*
+ call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
}
; CHECK-LABEL: @memset_group_to_flat_no_md(
; CHECK: call void @llvm.memset.p3i8.i64(i8 addrspace(3)* align 4 %group.ptr, i8 4, i64 %size, i1 false){{$}}
define amdgpu_kernel void @memset_group_to_flat_no_md(i8 addrspace(3)* %group.ptr, i64 %size) #0 {
- %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
- call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 %cast, i8 4, i64 %size, i1 false)
+ %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8*
+ call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 %size, i1 false)
ret void
}
; CHECK-LABEL: @memset_global_to_flat_no_md(
; CHECK: call void @llvm.memset.p1i8.i64(i8 addrspace(1)* align 4 %global.ptr, i8 4, i64 %size, i1 false){{$}}
define amdgpu_kernel void @memset_global_to_flat_no_md(i8 addrspace(1)* %global.ptr, i64 %size) #0 {
- %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)*
- call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 %cast, i8 4, i64 %size, i1 false)
+ %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8*
+ call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 %size, i1 false)
ret void
}
; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group(
-; CHCK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* align 4 %dest, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
-define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
- %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
- call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %dest, i8 addrspace(4)* align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+; CHCK: call void @llvm.memcpy.p0i8.p3i8.i64(i8* align 4 %dest, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group(i8* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
+ %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8*
+ call void @llvm.memcpy.p4i8.p0i8.i64(i8* align 4 %dest, i8* align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
}
; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_with_group(
-; CHECK: call void @llvm.memcpy.p3i8.p4i8.i64(i8 addrspace(3)* align 4 %dest.group.ptr, i8 addrspace(4)* align 4 %src.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
-define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_with_group(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(4)* %src.ptr, i64 %size) #0 {
- %cast.dest = addrspacecast i8 addrspace(3)* %dest.group.ptr to i8 addrspace(4)*
- call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %cast.dest, i8 addrspace(4)* align 4 %src.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+; CHECK: call void @llvm.memcpy.p3i8.p0i8.i64(i8 addrspace(3)* align 4 %dest.group.ptr, i8* align 4 %src.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_with_group(i8 addrspace(3)* %dest.group.ptr, i8* %src.ptr, i64 %size) #0 {
+ %cast.dest = addrspacecast i8 addrspace(3)* %dest.group.ptr to i8*
+ call void @llvm.memcpy.p4i8.p0i8.i64(i8* align 4 %cast.dest, i8* align 4 %src.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
}
; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_src_with_group(
; CHECK: call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* align 4 %src.group.ptr, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_src_with_group(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
- %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
- %cast.dest = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
- call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %cast.dest, i8 addrspace(4)* align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+ %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8*
+ %cast.dest = addrspacecast i8 addrspace(3)* %src.group.ptr to i8*
+ call void @llvm.memcpy.p4i8.p0i8.i64(i8* align 4 %cast.dest, i8* align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
}
; CHECK-LABEL: @memcpy_flat_to_flat_replace_dest_group_src_global(
; CHECK: call void @llvm.memcpy.p3i8.p1i8.i64(i8 addrspace(3)* align 4 %dest.group.ptr, i8 addrspace(1)* align 4 %src.global.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
define amdgpu_kernel void @memcpy_flat_to_flat_replace_dest_group_src_global(i8 addrspace(3)* %dest.group.ptr, i8 addrspace(1)* %src.global.ptr, i64 %size) #0 {
- %cast.src = addrspacecast i8 addrspace(1)* %src.global.ptr to i8 addrspace(4)*
- %cast.dest = addrspacecast i8 addrspace(3)* %dest.group.ptr to i8 addrspace(4)*
- call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %cast.dest, i8 addrspace(4)* align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+ %cast.src = addrspacecast i8 addrspace(1)* %src.global.ptr to i8*
+ %cast.dest = addrspacecast i8 addrspace(3)* %dest.group.ptr to i8*
+ call void @llvm.memcpy.p4i8.p0i8.i64(i8* align 4 %cast.dest, i8* align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
}
; CHECK-LABEL: @memcpy_group_to_flat_replace_dest_global(
; CHECK: call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* align 4 %dest.global.ptr, i8 addrspace(3)* align 4 %src.group.ptr, i32 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
define amdgpu_kernel void @memcpy_group_to_flat_replace_dest_global(i8 addrspace(1)* %dest.global.ptr, i8 addrspace(3)* %src.group.ptr, i32 %size) #0 {
- %cast.dest = addrspacecast i8 addrspace(1)* %dest.global.ptr to i8 addrspace(4)*
- call void @llvm.memcpy.p4i8.p3i8.i32(i8 addrspace(4)* align 4 %cast.dest, i8 addrspace(3)* align 4 %src.group.ptr, i32 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+ %cast.dest = addrspacecast i8 addrspace(1)* %dest.global.ptr to i8*
+ call void @llvm.memcpy.p0i8.p3i8.i32(i8* align 4 %cast.dest, i8 addrspace(3)* align 4 %src.group.ptr, i32 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
}
; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(
-; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* align 4 %dest, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false), !tbaa.struct !7
-define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
- %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
- call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %dest, i8 addrspace(4)* align 4 %cast.src, i64 %size, i1 false), !tbaa.struct !7
+; CHECK: call void @llvm.memcpy.p0i8.p3i8.i64(i8* align 4 %dest, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false), !tbaa.struct !7
+define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_tbaa_struct(i8* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
+ %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8*
+ call void @llvm.memcpy.p4i8.p0i8.i64(i8* align 4 %dest, i8* align 4 %cast.src, i64 %size, i1 false), !tbaa.struct !7
ret void
}
; CHECK-LABEL: @memcpy_flat_to_flat_replace_src_with_group_no_md(
-; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* align 4 %dest, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false){{$}}
-define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_no_md(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
- %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
- call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %dest, i8 addrspace(4)* align 4 %cast.src, i64 %size, i1 false)
+; CHECK: call void @llvm.memcpy.p0i8.p3i8.i64(i8* align 4 %dest, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false){{$}}
+define amdgpu_kernel void @memcpy_flat_to_flat_replace_src_with_group_no_md(i8* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
+ %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8*
+ call void @llvm.memcpy.p4i8.p0i8.i64(i8* align 4 %dest, i8* align 4 %cast.src, i64 %size, i1 false)
ret void
}
; CHECK-LABEL: @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(
-; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* align 4 %dest0, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false){{$}}
-; CHECK: call void @llvm.memcpy.p4i8.p3i8.i64(i8 addrspace(4)* align 4 %dest1, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false){{$}}
-define amdgpu_kernel void @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(i8 addrspace(4)* %dest0, i8 addrspace(4)* %dest1, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
- %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
- call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %dest0, i8 addrspace(4)* align 4 %cast.src, i64 %size, i1 false)
- call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %dest1, i8 addrspace(4)* align 4 %cast.src, i64 %size, i1 false)
+; CHECK: call void @llvm.memcpy.p0i8.p3i8.i64(i8* align 4 %dest0, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false){{$}}
+; CHECK: call void @llvm.memcpy.p0i8.p3i8.i64(i8* align 4 %dest1, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false){{$}}
+define amdgpu_kernel void @multiple_memcpy_flat_to_flat_replace_src_with_group_no_md(i8* %dest0, i8* %dest1, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
+ %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8*
+ call void @llvm.memcpy.p4i8.p0i8.i64(i8* align 4 %dest0, i8* align 4 %cast.src, i64 %size, i1 false)
+ call void @llvm.memcpy.p4i8.p0i8.i64(i8* align 4 %dest1, i8* align 4 %cast.src, i64 %size, i1 false)
ret void
}
@@ -104,22 +104,22 @@ define amdgpu_kernel void @multiple_memcpy_flat_to_flat_replace_src_with_group_n
; CHECK-LABEL: @memcpy_group_flat_to_flat_self(
; CHECK: call void @llvm.memcpy.p3i8.p3i8.i64(i8 addrspace(3)* align 4 %group.ptr, i8 addrspace(3)* align 4 %group.ptr, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
define amdgpu_kernel void @memcpy_group_flat_to_flat_self(i8 addrspace(3)* %group.ptr) #0 {
- %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
- call void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %cast, i8 addrspace(4)* align 4 %cast, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+ %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8*
+ call void @llvm.memcpy.p4i8.p0i8.i64(i8* align 4 %cast, i8* align 4 %cast, i64 32, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
}
; CHECK-LABEL: @memmove_flat_to_flat_replace_src_with_group(
-; CHECK: call void @llvm.memmove.p4i8.p3i8.i64(i8 addrspace(4)* align 4 %dest, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
-define amdgpu_kernel void @memmove_flat_to_flat_replace_src_with_group(i8 addrspace(4)* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
- %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8 addrspace(4)*
- call void @llvm.memmove.p4i8.p4i8.i64(i8 addrspace(4)* align 4 %dest, i8 addrspace(4)* align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+; CHECK: call void @llvm.memmove.p0i8.p3i8.i64(i8* align 4 %dest, i8 addrspace(3)* align 4 %src.group.ptr, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
+define amdgpu_kernel void @memmove_flat_to_flat_replace_src_with_group(i8* %dest, i8 addrspace(3)* %src.group.ptr, i64 %size) #0 {
+ %cast.src = addrspacecast i8 addrspace(3)* %src.group.ptr to i8*
+ call void @llvm.memmove.p4i8.p0i8.i64(i8* align 4 %dest, i8* align 4 %cast.src, i64 %size, i1 false), !tbaa !0, !alias.scope !3, !noalias !4
ret void
}
-declare void @llvm.memset.p4i8.i64(i8 addrspace(4)* nocapture writeonly, i8, i64, i1) #1
-declare void @llvm.memcpy.p4i8.p4i8.i64(i8 addrspace(4)* nocapture writeonly, i8 addrspace(4)* nocapture readonly, i64, i1) #1
-declare void @llvm.memcpy.p4i8.p3i8.i32(i8 addrspace(4)* nocapture writeonly, i8 addrspace(3)* nocapture readonly, i32, i1) #1
-declare void @llvm.memmove.p4i8.p4i8.i64(i8 addrspace(4)* nocapture writeonly, i8 addrspace(4)* nocapture readonly, i64, i1) #1
+declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #1
+declare void @llvm.memcpy.p4i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1
+declare void @llvm.memcpy.p0i8.p3i8.i32(i8* nocapture writeonly, i8 addrspace(3)* nocapture readonly, i32, i1) #1
+declare void @llvm.memmove.p4i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #1
attributes #0 = { nounwind }
attributes #1 = { argmemonly nounwind }
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/old-pass-regressions.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/old-pass-regressions.ll
index 3231b6ccf1c..2080c51b66f 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/old-pass-regressions.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/old-pass-regressions.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s | FileCheck %s
+; RUN: opt -data-layout=A5 -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s | FileCheck %s
; Regression tests from old HSAIL addrspacecast optimization pass
@@ -14,7 +14,7 @@ entry:
%tmp1 = call i32 @llvm.amdgcn.workitem.id.x()
%tmp2 = zext i32 %tmp1 to i64
%tmp3 = add i64 %tmp2, %arg0
- %vecload1 = load <2 x double>, <2 x double> addrspace(4)* bitcast (double addrspace(4)* getelementptr ([100 x double], [100 x double] addrspace(4)* addrspacecast ([100 x double] addrspace(1)* @data to [100 x double] addrspace(4)*), i64 0, i64 4) to <2 x double> addrspace(4)*), align 8
+ %vecload1 = load <2 x double>, <2 x double>* bitcast (double* getelementptr ([100 x double], [100 x double]* addrspacecast ([100 x double] addrspace(1)* @data to [100 x double]*), i64 0, i64 4) to <2 x double>*), align 8
%cmp = fcmp ord <2 x double> %vecload1, zeroinitializer
%sext = sext <2 x i1> %cmp to <2 x i64>
%tmp4 = extractelement <2 x i64> %sext, i64 0
@@ -30,7 +30,7 @@ entry:
@generic_address_bug9749.val = internal addrspace(1) global float 0.0, align 4
-declare i32 @_Z9get_fencePU3AS4v(i8 addrspace(4)*)
+declare i32 @_Z9get_fencePv(i8*)
%opencl.pipe_t = type opaque
; This is a compile time assert bug, but we still want to check optimization
@@ -53,24 +53,24 @@ entry:
; Should generate flat load
; CHECK-LABEL: @generic_address_bug9749(
; CHECK: br i1
-; CHECK: load float, float addrspace(4)*
+; CHECK: load float, float*
; CHECK: br label
define amdgpu_kernel void @generic_address_bug9749(i32 addrspace(1)* nocapture %results) #0 {
entry:
- %ptr = alloca float addrspace(4)*, align 8
+ %ptr = alloca float*, align 8, addrspace(5)
%tmp = call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = zext i32 %tmp to i64
store float 0x3FB99999A0000000, float addrspace(1)* @generic_address_bug9749.val, align 4
- store volatile float addrspace(4)* addrspacecast (float addrspace(1)* @generic_address_bug9749.val to float addrspace(4)*), float addrspace(4)** %ptr, align 8
- %tmp2 = load volatile float addrspace(4)*, float addrspace(4)** %ptr, align 8
+ store volatile float* addrspacecast (float addrspace(1)* @generic_address_bug9749.val to float*), float* addrspace(5)* %ptr, align 8
+ %tmp2 = load volatile float*, float* addrspace(5)* %ptr, align 8
%tmp3 = load float, float addrspace(1)* @generic_address_bug9749.val, align 4
- %tmp4 = bitcast float addrspace(4)* %tmp2 to i8 addrspace(4)*
- %call.i = call i32 @_Z9get_fencePU3AS4v(i8 addrspace(4)* %tmp4) #1
+ %tmp4 = bitcast float* %tmp2 to i8*
+ %call.i = call i32 @_Z9get_fencePv(i8* %tmp4) #1
%switch.i.i = icmp ult i32 %call.i, 4
br i1 %switch.i.i, label %if.end.i, label %helperFunction.exit
if.end.i: ; preds = %entry
- %tmp5 = load float, float addrspace(4)* %tmp2, align 4
+ %tmp5 = load float, float* %tmp2, align 4
%not.cmp.i = fcmp oeq float %tmp5, %tmp3
%phitmp = zext i1 %not.cmp.i to i32
br label %helperFunction.exit
@@ -91,14 +91,14 @@ entry:
br i1 %cmp1, label %for.end, label %for.body.lr.ph
for.body.lr.ph: ; preds = %entry
- %tmp = addrspacecast i32 addrspace(3)* %in to i32 addrspace(4)*
+ %tmp = addrspacecast i32 addrspace(3)* %in to i32*
br label %for.body
for.body: ; preds = %for.body, %for.body.lr.ph
%i.03 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
- %ptr.02 = phi i32 addrspace(4)* [ %tmp, %for.body.lr.ph ], [ %add.ptr, %for.body ]
- store i32 %i.03, i32 addrspace(4)* %ptr.02, align 4
- %add.ptr = getelementptr inbounds i32, i32 addrspace(4)* %ptr.02, i64 4
+ %ptr.02 = phi i32* [ %tmp, %for.body.lr.ph ], [ %add.ptr, %for.body ]
+ store i32 %i.03, i32* %ptr.02, align 4
+ %add.ptr = getelementptr inbounds i32, i32* %ptr.02, i64 4
%inc = add nuw i32 %i.03, 1
%exitcond = icmp eq i32 %inc, %numElems
br i1 %exitcond, label %for.end, label %for.body
@@ -116,23 +116,23 @@ entry:
%tmp2 = zext i32 %tmp1 to i64
%tmp3 = add i64 %tmp2, %arg0
%sext = shl i64 %tmp3, 32
- %tmp4 = addrspacecast i32 addrspace(3)* %destValues to i32 addrspace(4)*
- %tmp5 = addrspacecast i32 addrspace(3)* %sourceA to i32 addrspace(4)*
+ %tmp4 = addrspacecast i32 addrspace(3)* %destValues to i32*
+ %tmp5 = addrspacecast i32 addrspace(3)* %sourceA to i32*
%tmp6 = ashr exact i64 %sext, 31
- %tmp7 = getelementptr inbounds i32, i32 addrspace(4)* %tmp5, i64 %tmp6
- %arrayidx_v4 = bitcast i32 addrspace(4)* %tmp7 to <2 x i32> addrspace(4)*
- %vecload = load <2 x i32>, <2 x i32> addrspace(4)* %arrayidx_v4, align 4
+ %tmp7 = getelementptr inbounds i32, i32* %tmp5, i64 %tmp6
+ %arrayidx_v4 = bitcast i32* %tmp7 to <2 x i32>*
+ %vecload = load <2 x i32>, <2 x i32>* %arrayidx_v4, align 4
%tmp8 = extractelement <2 x i32> %vecload, i32 0
%tmp9 = extractelement <2 x i32> %vecload, i32 1
%tmp10 = icmp eq i32 %tmp8, 0
%tmp11 = select i1 %tmp10, i32 32, i32 %tmp8
%tmp12 = icmp eq i32 %tmp9, 0
%tmp13 = select i1 %tmp12, i32 32, i32 %tmp9
- %tmp14 = getelementptr inbounds i32, i32 addrspace(4)* %tmp4, i64 %tmp6
+ %tmp14 = getelementptr inbounds i32, i32* %tmp4, i64 %tmp6
%tmp15 = insertelement <2 x i32> undef, i32 %tmp11, i32 0
%tmp16 = insertelement <2 x i32> %tmp15, i32 %tmp13, i32 1
- %arrayidx_v41 = bitcast i32 addrspace(4)* %tmp14 to <2 x i32> addrspace(4)*
- store <2 x i32> %tmp16, <2 x i32> addrspace(4)* %arrayidx_v41, align 4
+ %arrayidx_v41 = bitcast i32* %tmp14 to <2 x i32>*
+ store <2 x i32> %tmp16, <2 x i32>* %arrayidx_v41, align 4
ret void
}
@@ -140,4 +140,4 @@ declare i32 @llvm.amdgcn.workitem.id.x() #2
attributes #0 = { nounwind }
attributes #1 = { nounwind readonly }
-attributes #2 = { nounwind readnone } \ No newline at end of file
+attributes #2 = { nounwind readnone }
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll
index 08edc20ecf9..598bb68dc29 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll
@@ -4,25 +4,25 @@
; this doesn't do something insane on non-canonical IR.
; CHECK-LABEL: @return_select_group_flat(
-; CHECK-NEXT: %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
-; CHECK-NEXT: %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)*
-; CHECK-NEXT: %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* %cast1
-; CHECK-NEXT: ret i32 addrspace(4)* %select
-define i32 addrspace(4)* @return_select_group_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* %cast1
- ret i32 addrspace(4)* %select
+; CHECK-NEXT: %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+; CHECK-NEXT: %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
+; CHECK-NEXT: %select = select i1 %c, i32* %cast0, i32* %cast1
+; CHECK-NEXT: ret i32* %select
+define i32* @return_select_group_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 {
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
+ %select = select i1 %c, i32* %cast0, i32* %cast1
+ ret i32* %select
}
; CHECK-LABEL: @store_select_group_flat(
; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1
; CHECK: store i32 -1, i32 addrspace(3)* %select
define amdgpu_kernel void @store_select_group_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* %cast1
- store i32 -1, i32 addrspace(4)* %select
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
+ %select = select i1 %c, i32* %cast0, i32* %cast1
+ store i32 -1, i32* %select
ret void
}
@@ -31,23 +31,23 @@ define amdgpu_kernel void @store_select_group_flat(i1 %c, i32 addrspace(3)* %gro
; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1, !prof !0
; CHECK: %load = load i32, i32 addrspace(3)* %select
define i32 @load_select_group_flat_md(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* %group.ptr.1) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* %cast1, !prof !0
- %load = load i32, i32 addrspace(4)* %select
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cast1 = addrspacecast i32 addrspace(3)* %group.ptr.1 to i32*
+ %select = select i1 %c, i32* %cast0, i32* %cast1, !prof !0
+ %load = load i32, i32* %select
ret i32 %load
}
; CHECK-LABEL: @store_select_mismatch_group_private_flat(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
-; CHECK: %2 = addrspacecast i32* %private.ptr.1 to i32 addrspace(4)*
-; CHECK: %select = select i1 %c, i32 addrspace(4)* %1, i32 addrspace(4)* %2
-; CHECK: store i32 -1, i32 addrspace(4)* %select
-define amdgpu_kernel void @store_select_mismatch_group_private_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32* %private.ptr.1) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %cast1 = addrspacecast i32* %private.ptr.1 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* %cast1
- store i32 -1, i32 addrspace(4)* %select
+; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+; CHECK: %2 = addrspacecast i32 addrspace(5)* %private.ptr.1 to i32*
+; CHECK: %select = select i1 %c, i32* %1, i32* %2
+; CHECK: store i32 -1, i32* %select
+define amdgpu_kernel void @store_select_mismatch_group_private_flat(i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(5)* %private.ptr.1) #0 {
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %cast1 = addrspacecast i32 addrspace(5)* %private.ptr.1 to i32*
+ %select = select i1 %c, i32* %cast0, i32* %cast1
+ store i32 -1, i32* %select
ret void
}
@@ -58,35 +58,35 @@ define amdgpu_kernel void @store_select_mismatch_group_private_flat(i1 %c, i32 a
; CHECK: %tmp = load i32, i32 addrspace(3)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(3)* @lds0, i32 addrspace(3)* @lds1)
define i32 @constexpr_select_group_flat() #0 {
bb:
- %tmp = load i32, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds0 to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds1 to i32 addrspace(4)*))
+ %tmp = load i32, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), i32* addrspacecast (i32 addrspace(3)* @lds1 to i32*))
ret i32 %tmp
}
; CHECK-LABEL: @constexpr_select_group_global_flat_mismatch(
-; CHECK: %tmp = load i32, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds0 to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*))
+; CHECK: %tmp = load i32, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), i32* addrspacecast (i32 addrspace(1)* @global0 to i32*))
define i32 @constexpr_select_group_global_flat_mismatch() #0 {
bb:
- %tmp = load i32, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds0 to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*))
+ %tmp = load i32, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), i32* addrspacecast (i32 addrspace(1)* @global0 to i32*))
ret i32 %tmp
}
; CHECK-LABEL: @store_select_group_flat_null(
-; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*)
+; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32* null to i32 addrspace(3)*)
; CHECK: store i32 -1, i32 addrspace(3)* %select
define amdgpu_kernel void @store_select_group_flat_null(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* null
- store i32 -1, i32 addrspace(4)* %select
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %select = select i1 %c, i32* %cast0, i32* null
+ store i32 -1, i32* %select
ret void
}
; CHECK-LABEL: @store_select_group_flat_null_swap(
-; CHECK: %select = select i1 %c, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*), i32 addrspace(3)* %group.ptr.0
+; CHECK: %select = select i1 %c, i32 addrspace(3)* addrspacecast (i32* null to i32 addrspace(3)*), i32 addrspace(3)* %group.ptr.0
; CHECK: store i32 -1, i32 addrspace(3)* %select
define amdgpu_kernel void @store_select_group_flat_null_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* null, i32 addrspace(4)* %cast0
- store i32 -1, i32 addrspace(4)* %select
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %select = select i1 %c, i32* null, i32* %cast0
+ store i32 -1, i32* %select
ret void
}
@@ -94,9 +94,9 @@ define amdgpu_kernel void @store_select_group_flat_null_swap(i1 %c, i32 addrspac
; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* undef
; CHECK: store i32 -1, i32 addrspace(3)* %select
define amdgpu_kernel void @store_select_group_flat_undef(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* undef
- store i32 -1, i32 addrspace(4)* %select
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %select = select i1 %c, i32* %cast0, i32* undef
+ store i32 -1, i32* %select
ret void
}
@@ -104,21 +104,21 @@ define amdgpu_kernel void @store_select_group_flat_undef(i1 %c, i32 addrspace(3)
; CHECK: %select = select i1 %c, i32 addrspace(3)* undef, i32 addrspace(3)* %group.ptr.0
; CHECK: store i32 -1, i32 addrspace(3)* %select
define amdgpu_kernel void @store_select_group_flat_undef_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* undef, i32 addrspace(4)* %cast0
- store i32 -1, i32 addrspace(4)* %select
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %select = select i1 %c, i32* undef, i32* %cast0
+ store i32 -1, i32* %select
ret void
}
; CHECK-LABEL: @store_select_gep_group_flat_null(
-; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*)
+; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32* null to i32 addrspace(3)*)
; CHECK: %gep = getelementptr i32, i32 addrspace(3)* %select, i64 16
; CHECK: store i32 -1, i32 addrspace(3)* %gep
define amdgpu_kernel void @store_select_gep_group_flat_null(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* null
- %gep = getelementptr i32, i32 addrspace(4)* %select, i64 16
- store i32 -1, i32 addrspace(4)* %gep
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %select = select i1 %c, i32* %cast0, i32* null
+ %gep = getelementptr i32, i32* %select, i64 16
+ store i32 -1, i32* %gep
ret void
}
@@ -128,19 +128,19 @@ define amdgpu_kernel void @store_select_gep_group_flat_null(i1 %c, i32 addrspace
; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* @lds1
; CHECK: store i32 7, i32 addrspace(3)* %select
define amdgpu_kernel void @store_select_group_flat_constexpr(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds1 to i32 addrspace(4)*)
- store i32 7, i32 addrspace(4)* %select
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %select = select i1 %c, i32* %cast0, i32* addrspacecast (i32 addrspace(3)* @lds1 to i32*)
+ store i32 7, i32* %select
ret void
}
; CHECK-LABEL: @store_select_group_flat_inttoptr_flat(
-; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32 addrspace(4)* inttoptr (i64 12345 to i32 addrspace(4)*) to i32 addrspace(3)*)
+; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* addrspacecast (i32* inttoptr (i64 12345 to i32*) to i32 addrspace(3)*)
; CHECK: store i32 7, i32 addrspace(3)* %select
define amdgpu_kernel void @store_select_group_flat_inttoptr_flat(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* inttoptr (i64 12345 to i32 addrspace(4)*)
- store i32 7, i32 addrspace(4)* %select
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %select = select i1 %c, i32* %cast0, i32* inttoptr (i64 12345 to i32*)
+ store i32 7, i32* %select
ret void
}
@@ -148,114 +148,114 @@ define amdgpu_kernel void @store_select_group_flat_inttoptr_flat(i1 %c, i32 addr
; CHECK: %select = select i1 %c, i32 addrspace(3)* %group.ptr.0, i32 addrspace(3)* inttoptr (i32 400 to i32 addrspace(3)*)
; CHECK-NEXT: store i32 7, i32 addrspace(3)* %select
define amdgpu_kernel void @store_select_group_flat_inttoptr_group(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* inttoptr (i32 400 to i32 addrspace(3)*) to i32 addrspace(4)*)
- store i32 7, i32 addrspace(4)* %select
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %select = select i1 %c, i32* %cast0, i32* addrspacecast (i32 addrspace(3)* inttoptr (i32 400 to i32 addrspace(3)*) to i32*)
+ store i32 7, i32* %select
ret void
}
; CHECK-LABEL: @store_select_group_global_mismatch_flat_constexpr(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
-; CHECK: %select = select i1 %c, i32 addrspace(4)* %1, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*)
-; CHECK: store i32 7, i32 addrspace(4)* %select
+; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+; CHECK: %select = select i1 %c, i32* %1, i32* addrspacecast (i32 addrspace(1)* @global0 to i32*)
+; CHECK: store i32 7, i32* %select
define amdgpu_kernel void @store_select_group_global_mismatch_flat_constexpr(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*)
- store i32 7, i32 addrspace(4)* %select
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %select = select i1 %c, i32* %cast0, i32* addrspacecast (i32 addrspace(1)* @global0 to i32*)
+ store i32 7, i32* %select
ret void
}
; CHECK-LABEL: @store_select_group_global_mismatch_flat_constexpr_swap(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
-; CHECK: %select = select i1 %c, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*), i32 addrspace(4)* %1
-; CHECK: store i32 7, i32 addrspace(4)* %select
+; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+; CHECK: %select = select i1 %c, i32* addrspacecast (i32 addrspace(1)* @global0 to i32*), i32* %1
+; CHECK: store i32 7, i32* %select
define amdgpu_kernel void @store_select_group_global_mismatch_flat_constexpr_swap(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*), i32 addrspace(4)* %cast0
- store i32 7, i32 addrspace(4)* %select
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %select = select i1 %c, i32* addrspacecast (i32 addrspace(1)* @global0 to i32*), i32* %cast0
+ store i32 7, i32* %select
ret void
}
; CHECK-LABEL: @store_select_group_global_mismatch_null_null(
-; CHECK: %select = select i1 %c, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)
-; CHECK: store i32 7, i32 addrspace(4)* %select
+; CHECK: %select = select i1 %c, i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)
+; CHECK: store i32 7, i32* %select
define amdgpu_kernel void @store_select_group_global_mismatch_null_null(i1 %c) #0 {
- %select = select i1 %c, i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)
- store i32 7, i32 addrspace(4)* %select
+ %select = select i1 %c, i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)
+ store i32 7, i32* %select
ret void
}
; CHECK-LABEL: @store_select_group_global_mismatch_null_null_constexpr(
-; CHECK: store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4
+; CHECK: store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4
define amdgpu_kernel void @store_select_group_global_mismatch_null_null_constexpr() #0 {
- store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4
+ store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4
ret void
}
; CHECK-LABEL: @store_select_group_global_mismatch_gv_null_constexpr(
-; CHECK: store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds0 to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4
+; CHECK: store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4
define amdgpu_kernel void @store_select_group_global_mismatch_gv_null_constexpr() #0 {
- store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* @lds0 to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4
+ store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* @lds0 to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4
ret void
}
; CHECK-LABEL: @store_select_group_global_mismatch_null_gv_constexpr(
-; CHECK: store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*)), align 4
+; CHECK: store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* @global0 to i32*)), align 4
define amdgpu_kernel void @store_select_group_global_mismatch_null_gv_constexpr() #0 {
- store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* @global0 to i32 addrspace(4)*)), align 4
+ store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* @global0 to i32*)), align 4
ret void
}
; CHECK-LABEL: @store_select_group_global_mismatch_inttoptr_null_constexpr(
-; CHECK: store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* inttoptr (i64 123 to i32 addrspace(3)*) to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4
+; CHECK: store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* inttoptr (i64 123 to i32 addrspace(3)*) to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4
define amdgpu_kernel void @store_select_group_global_mismatch_inttoptr_null_constexpr() #0 {
- store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* inttoptr (i64 123 to i32 addrspace(3)*) to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4
+ store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* inttoptr (i64 123 to i32 addrspace(3)*) to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4
ret void
}
; CHECK-LABEL: @store_select_group_global_mismatch_inttoptr_flat_null_constexpr(
-; CHECK: store i32 7, i32 addrspace(1)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(1)* addrspacecast (i32 addrspace(4)* inttoptr (i64 123 to i32 addrspace(4)*) to i32 addrspace(1)*), i32 addrspace(1)* null), align 4
+; CHECK: store i32 7, i32 addrspace(1)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(1)* addrspacecast (i32* inttoptr (i64 123 to i32*) to i32 addrspace(1)*), i32 addrspace(1)* null), align 4
define amdgpu_kernel void @store_select_group_global_mismatch_inttoptr_flat_null_constexpr() #0 {
- store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* inttoptr (i64 123 to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* null to i32 addrspace(4)*)), align 4
+ store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* inttoptr (i64 123 to i32*), i32* addrspacecast (i32 addrspace(1)* null to i32*)), align 4
ret void
}
; CHECK-LABEL: @store_select_group_global_mismatch_undef_undef_constexpr(
; CHECK: store i32 7, i32 addrspace(3)* null
define amdgpu_kernel void @store_select_group_global_mismatch_undef_undef_constexpr() #0 {
- store i32 7, i32 addrspace(4)* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32 addrspace(4)* addrspacecast (i32 addrspace(3)* null to i32 addrspace(4)*), i32 addrspace(4)* addrspacecast (i32 addrspace(1)* undef to i32 addrspace(4)*)), align 4
+ store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* undef to i32*)), align 4
ret void
}
@lds2 = external addrspace(3) global [1024 x i32], align 4
; CHECK-LABEL: @store_select_group_constexpr_ptrtoint(
-; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
-; CHECK: %select = select i1 %c, i32 addrspace(4)* %1, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* inttoptr (i32 add (i32 ptrtoint ([1024 x i32] addrspace(3)* @lds2 to i32), i32 124) to i32 addrspace(1)*) to i32 addrspace(4)*)
-; CHECK: store i32 7, i32 addrspace(4)* %select
+; CHECK: %1 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+; CHECK: %select = select i1 %c, i32* %1, i32* addrspacecast (i32 addrspace(1)* inttoptr (i32 add (i32 ptrtoint ([1024 x i32] addrspace(3)* @lds2 to i32), i32 124) to i32 addrspace(1)*) to i32*)
+; CHECK: store i32 7, i32* %select
define amdgpu_kernel void @store_select_group_constexpr_ptrtoint(i1 %c, i32 addrspace(3)* %group.ptr.0) #0 {
- %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32 addrspace(4)*
- %select = select i1 %c, i32 addrspace(4)* %cast0, i32 addrspace(4)* addrspacecast (i32 addrspace(1)* inttoptr (i32 add (i32 ptrtoint ([1024 x i32] addrspace(3)* @lds2 to i32), i32 124) to i32 addrspace(1)*) to i32 addrspace(4)*)
- store i32 7, i32 addrspace(4)* %select
+ %cast0 = addrspacecast i32 addrspace(3)* %group.ptr.0 to i32*
+ %select = select i1 %c, i32* %cast0, i32* addrspacecast (i32 addrspace(1)* inttoptr (i32 add (i32 ptrtoint ([1024 x i32] addrspace(3)* @lds2 to i32), i32 124) to i32 addrspace(1)*) to i32*)
+ store i32 7, i32* %select
ret void
}
; CHECK-LABEL: @store_select_group_flat_vector(
-; CHECK: %cast0 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.0 to <2 x i32 addrspace(4)*>
-; CHECK: %cast1 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.1 to <2 x i32 addrspace(4)*>
-; CHECK: %select = select i1 %c, <2 x i32 addrspace(4)*> %cast0, <2 x i32 addrspace(4)*> %cast1
-; CHECK: %extract0 = extractelement <2 x i32 addrspace(4)*> %select, i32 0
-; CHECK: %extract1 = extractelement <2 x i32 addrspace(4)*> %select, i32 1
-; CHECK: store i32 -1, i32 addrspace(4)* %extract0
-; CHECK: store i32 -2, i32 addrspace(4)* %extract1
+; CHECK: %cast0 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.0 to <2 x i32*>
+; CHECK: %cast1 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.1 to <2 x i32*>
+; CHECK: %select = select i1 %c, <2 x i32*> %cast0, <2 x i32*> %cast1
+; CHECK: %extract0 = extractelement <2 x i32*> %select, i32 0
+; CHECK: %extract1 = extractelement <2 x i32*> %select, i32 1
+; CHECK: store i32 -1, i32* %extract0
+; CHECK: store i32 -2, i32* %extract1
define amdgpu_kernel void @store_select_group_flat_vector(i1 %c, <2 x i32 addrspace(3)*> %group.ptr.0, <2 x i32 addrspace(3)*> %group.ptr.1) #0 {
- %cast0 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.0 to <2 x i32 addrspace(4)*>
- %cast1 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.1 to <2 x i32 addrspace(4)*>
- %select = select i1 %c, <2 x i32 addrspace(4)*> %cast0, <2 x i32 addrspace(4)*> %cast1
- %extract0 = extractelement <2 x i32 addrspace(4)*> %select, i32 0
- %extract1 = extractelement <2 x i32 addrspace(4)*> %select, i32 1
- store i32 -1, i32 addrspace(4)* %extract0
- store i32 -2, i32 addrspace(4)* %extract1
+ %cast0 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.0 to <2 x i32*>
+ %cast1 = addrspacecast <2 x i32 addrspace(3)*> %group.ptr.1 to <2 x i32*>
+ %select = select i1 %c, <2 x i32*> %cast0, <2 x i32*> %cast1
+ %extract0 = extractelement <2 x i32*> %select, i32 0
+ %extract1 = extractelement <2 x i32*> %select, i32 1
+ store i32 -1, i32* %extract0
+ store i32 -2, i32* %extract1
ret void
}
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll
index 49467cea930..6c9449cc8ff 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll
@@ -3,138 +3,138 @@
; Check that volatile users of addrspacecast are not replaced.
; CHECK-LABEL: @volatile_load_flat_from_global(
-; CHECK: load volatile i32, i32 addrspace(4)*
+; CHECK: load volatile i32, i32*
; CHECK: store i32 %val, i32 addrspace(1)*
define amdgpu_kernel void @volatile_load_flat_from_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 {
- %tmp0 = addrspacecast i32 addrspace(1)* %input to i32 addrspace(4)*
- %tmp1 = addrspacecast i32 addrspace(1)* %output to i32 addrspace(4)*
- %val = load volatile i32, i32 addrspace(4)* %tmp0, align 4
- store i32 %val, i32 addrspace(4)* %tmp1, align 4
+ %tmp0 = addrspacecast i32 addrspace(1)* %input to i32*
+ %tmp1 = addrspacecast i32 addrspace(1)* %output to i32*
+ %val = load volatile i32, i32* %tmp0, align 4
+ store i32 %val, i32* %tmp1, align 4
ret void
}
; CHECK-LABEL: @volatile_load_flat_from_constant(
-; CHECK: load volatile i32, i32 addrspace(4)*
+; CHECK: load volatile i32, i32*
; CHECK: store i32 %val, i32 addrspace(1)*
define amdgpu_kernel void @volatile_load_flat_from_constant(i32 addrspace(2)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 {
- %tmp0 = addrspacecast i32 addrspace(2)* %input to i32 addrspace(4)*
- %tmp1 = addrspacecast i32 addrspace(1)* %output to i32 addrspace(4)*
- %val = load volatile i32, i32 addrspace(4)* %tmp0, align 4
- store i32 %val, i32 addrspace(4)* %tmp1, align 4
+ %tmp0 = addrspacecast i32 addrspace(2)* %input to i32*
+ %tmp1 = addrspacecast i32 addrspace(1)* %output to i32*
+ %val = load volatile i32, i32* %tmp0, align 4
+ store i32 %val, i32* %tmp1, align 4
ret void
}
; CHECK-LABEL: @volatile_load_flat_from_group(
-; CHECK: load volatile i32, i32 addrspace(4)*
+; CHECK: load volatile i32, i32*
; CHECK: store i32 %val, i32 addrspace(3)*
define amdgpu_kernel void @volatile_load_flat_from_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 {
- %tmp0 = addrspacecast i32 addrspace(3)* %input to i32 addrspace(4)*
- %tmp1 = addrspacecast i32 addrspace(3)* %output to i32 addrspace(4)*
- %val = load volatile i32, i32 addrspace(4)* %tmp0, align 4
- store i32 %val, i32 addrspace(4)* %tmp1, align 4
+ %tmp0 = addrspacecast i32 addrspace(3)* %input to i32*
+ %tmp1 = addrspacecast i32 addrspace(3)* %output to i32*
+ %val = load volatile i32, i32* %tmp0, align 4
+ store i32 %val, i32* %tmp1, align 4
ret void
}
; CHECK-LABEL: @volatile_load_flat_from_private(
-; CHECK: load volatile i32, i32 addrspace(4)*
-; CHECK: store i32 %val, i32*
-define amdgpu_kernel void @volatile_load_flat_from_private(i32* nocapture %input, i32* nocapture %output) #0 {
- %tmp0 = addrspacecast i32* %input to i32 addrspace(4)*
- %tmp1 = addrspacecast i32* %output to i32 addrspace(4)*
- %val = load volatile i32, i32 addrspace(4)* %tmp0, align 4
- store i32 %val, i32 addrspace(4)* %tmp1, align 4
+; CHECK: load volatile i32, i32*
+; CHECK: store i32 %val, i32 addrspace(5)*
+define amdgpu_kernel void @volatile_load_flat_from_private(i32 addrspace(5)* nocapture %input, i32 addrspace(5)* nocapture %output) #0 {
+ %tmp0 = addrspacecast i32 addrspace(5)* %input to i32*
+ %tmp1 = addrspacecast i32 addrspace(5)* %output to i32*
+ %val = load volatile i32, i32* %tmp0, align 4
+ store i32 %val, i32* %tmp1, align 4
ret void
}
; CHECK-LABEL: @volatile_store_flat_to_global(
; CHECK: load i32, i32 addrspace(1)*
-; CHECK: store volatile i32 %val, i32 addrspace(4)*
+; CHECK: store volatile i32 %val, i32*
define amdgpu_kernel void @volatile_store_flat_to_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 {
- %tmp0 = addrspacecast i32 addrspace(1)* %input to i32 addrspace(4)*
- %tmp1 = addrspacecast i32 addrspace(1)* %output to i32 addrspace(4)*
- %val = load i32, i32 addrspace(4)* %tmp0, align 4
- store volatile i32 %val, i32 addrspace(4)* %tmp1, align 4
+ %tmp0 = addrspacecast i32 addrspace(1)* %input to i32*
+ %tmp1 = addrspacecast i32 addrspace(1)* %output to i32*
+ %val = load i32, i32* %tmp0, align 4
+ store volatile i32 %val, i32* %tmp1, align 4
ret void
}
; CHECK-LABEL: @volatile_store_flat_to_group(
; CHECK: load i32, i32 addrspace(3)*
-; CHECK: store volatile i32 %val, i32 addrspace(4)*
+; CHECK: store volatile i32 %val, i32*
define amdgpu_kernel void @volatile_store_flat_to_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 {
- %tmp0 = addrspacecast i32 addrspace(3)* %input to i32 addrspace(4)*
- %tmp1 = addrspacecast i32 addrspace(3)* %output to i32 addrspace(4)*
- %val = load i32, i32 addrspace(4)* %tmp0, align 4
- store volatile i32 %val, i32 addrspace(4)* %tmp1, align 4
+ %tmp0 = addrspacecast i32 addrspace(3)* %input to i32*
+ %tmp1 = addrspacecast i32 addrspace(3)* %output to i32*
+ %val = load i32, i32* %tmp0, align 4
+ store volatile i32 %val, i32* %tmp1, align 4
ret void
}
; CHECK-LABEL: @volatile_store_flat_to_private(
-; CHECK: load i32, i32*
-; CHECK: store volatile i32 %val, i32 addrspace(4)*
-define amdgpu_kernel void @volatile_store_flat_to_private(i32* nocapture %input, i32* nocapture %output) #0 {
- %tmp0 = addrspacecast i32* %input to i32 addrspace(4)*
- %tmp1 = addrspacecast i32* %output to i32 addrspace(4)*
- %val = load i32, i32 addrspace(4)* %tmp0, align 4
- store volatile i32 %val, i32 addrspace(4)* %tmp1, align 4
+; CHECK: load i32, i32 addrspace(5)*
+; CHECK: store volatile i32 %val, i32*
+define amdgpu_kernel void @volatile_store_flat_to_private(i32 addrspace(5)* nocapture %input, i32 addrspace(5)* nocapture %output) #0 {
+ %tmp0 = addrspacecast i32 addrspace(5)* %input to i32*
+ %tmp1 = addrspacecast i32 addrspace(5)* %output to i32*
+ %val = load i32, i32* %tmp0, align 4
+ store volatile i32 %val, i32* %tmp1, align 4
ret void
}
; CHECK-LABEL: @volatile_atomicrmw_add_group_to_flat(
-; CHECK: addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)*
-; CHECK: atomicrmw volatile add i32 addrspace(4)*
+; CHECK: addrspacecast i32 addrspace(3)* %group.ptr to i32*
+; CHECK: atomicrmw volatile add i32*
define i32 @volatile_atomicrmw_add_group_to_flat(i32 addrspace(3)* %group.ptr, i32 %y) #0 {
- %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)*
- %ret = atomicrmw volatile add i32 addrspace(4)* %cast, i32 %y seq_cst
+ %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32*
+ %ret = atomicrmw volatile add i32* %cast, i32 %y seq_cst
ret i32 %ret
}
; CHECK-LABEL: @volatile_atomicrmw_add_global_to_flat(
-; CHECK: addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)*
-; CHECK: %ret = atomicrmw volatile add i32 addrspace(4)*
+; CHECK: addrspacecast i32 addrspace(1)* %global.ptr to i32*
+; CHECK: %ret = atomicrmw volatile add i32*
define i32 @volatile_atomicrmw_add_global_to_flat(i32 addrspace(1)* %global.ptr, i32 %y) #0 {
- %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)*
- %ret = atomicrmw volatile add i32 addrspace(4)* %cast, i32 %y seq_cst
+ %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32*
+ %ret = atomicrmw volatile add i32* %cast, i32 %y seq_cst
ret i32 %ret
}
; CHECK-LABEL: @volatile_cmpxchg_global_to_flat(
-; CHECK: addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)*
-; CHECK: cmpxchg volatile i32 addrspace(4)*
+; CHECK: addrspacecast i32 addrspace(1)* %global.ptr to i32*
+; CHECK: cmpxchg volatile i32*
define { i32, i1 } @volatile_cmpxchg_global_to_flat(i32 addrspace(1)* %global.ptr, i32 %cmp, i32 %val) #0 {
- %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)*
- %ret = cmpxchg volatile i32 addrspace(4)* %cast, i32 %cmp, i32 %val seq_cst monotonic
+ %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32*
+ %ret = cmpxchg volatile i32* %cast, i32 %cmp, i32 %val seq_cst monotonic
ret { i32, i1 } %ret
}
; CHECK-LABEL: @volatile_cmpxchg_group_to_flat(
-; CHECK: addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)*
-; CHECK: cmpxchg volatile i32 addrspace(4)*
+; CHECK: addrspacecast i32 addrspace(3)* %group.ptr to i32*
+; CHECK: cmpxchg volatile i32*
define { i32, i1 } @volatile_cmpxchg_group_to_flat(i32 addrspace(3)* %group.ptr, i32 %cmp, i32 %val) #0 {
- %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)*
- %ret = cmpxchg volatile i32 addrspace(4)* %cast, i32 %cmp, i32 %val seq_cst monotonic
+ %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32*
+ %ret = cmpxchg volatile i32* %cast, i32 %cmp, i32 %val seq_cst monotonic
ret { i32, i1 } %ret
}
; FIXME: Shouldn't be losing names
; CHECK-LABEL: @volatile_memset_group_to_flat(
-; CHECK: addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
-; CHECK: call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 %1, i8 4, i64 32, i1 true)
+; CHECK: addrspacecast i8 addrspace(3)* %group.ptr to i8*
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %1, i8 4, i64 32, i1 true)
define amdgpu_kernel void @volatile_memset_group_to_flat(i8 addrspace(3)* %group.ptr, i32 %y) #0 {
- %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)*
- call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 %cast, i8 4, i64 32, i1 true)
+ %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8*
+ call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 32, i1 true)
ret void
}
; CHECK-LABEL: @volatile_memset_global_to_flat(
-; CHECK: addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)*
-; CHECK: call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 %1, i8 4, i64 32, i1 true)
+; CHECK: addrspacecast i8 addrspace(1)* %global.ptr to i8*
+; CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 %1, i8 4, i64 32, i1 true)
define amdgpu_kernel void @volatile_memset_global_to_flat(i8 addrspace(1)* %global.ptr, i32 %y) #0 {
- %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8 addrspace(4)*
- call void @llvm.memset.p4i8.i64(i8 addrspace(4)* align 4 %cast, i8 4, i64 32, i1 true)
+ %cast = addrspacecast i8 addrspace(1)* %global.ptr to i8*
+ call void @llvm.memset.p0i8.i64(i8* align 4 %cast, i8 4, i64 32, i1 true)
ret void
}
-declare void @llvm.memset.p4i8.i64(i8 addrspace(4)* nocapture writeonly, i8, i64, i1) #1
+declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #1
attributes #0 = { nounwind }
attributes #1 = { argmemonly nounwind }
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll
index 368dc6ab361..87acb1057af 100644
--- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll
+++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/adjust-alloca-alignment.ll
@@ -1,38 +1,37 @@
-; RUN: opt -S -load-store-vectorizer -mattr=-unaligned-buffer-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s
-; RUN: opt -S -load-store-vectorizer -mattr=+unaligned-buffer-access,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s
+; RUN: opt -data-layout=A5 -S -load-store-vectorizer -mattr=-unaligned-buffer-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s
+; RUN: opt -data-layout=A5 -S -load-store-vectorizer -mattr=+unaligned-buffer-access,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s
-target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
target triple = "amdgcn--"
; ALL-LABEL: @load_unknown_offset_align1_i8(
; ALL: alloca [128 x i8], align 1
-; UNALIGNED: load <2 x i8>, <2 x i8>* %{{[0-9]+}}, align 1{{$}}
+; UNALIGNED: load <2 x i8>, <2 x i8> addrspace(5)* %{{[0-9]+}}, align 1{{$}}
-; ALIGNED: load i8, i8* %ptr0, align 1{{$}}
-; ALIGNED: load i8, i8* %ptr1, align 1{{$}}
+; ALIGNED: load i8, i8 addrspace(5)* %ptr0, align 1{{$}}
+; ALIGNED: load i8, i8 addrspace(5)* %ptr1, align 1{{$}}
define amdgpu_kernel void @load_unknown_offset_align1_i8(i8 addrspace(1)* noalias %out, i32 %offset) #0 {
- %alloca = alloca [128 x i8], align 1
- %ptr0 = getelementptr inbounds [128 x i8], [128 x i8]* %alloca, i32 0, i32 %offset
- %val0 = load i8, i8* %ptr0, align 1
- %ptr1 = getelementptr inbounds i8, i8* %ptr0, i32 1
- %val1 = load i8, i8* %ptr1, align 1
+ %alloca = alloca [128 x i8], align 1, addrspace(5)
+ %ptr0 = getelementptr inbounds [128 x i8], [128 x i8] addrspace(5)* %alloca, i32 0, i32 %offset
+ %val0 = load i8, i8 addrspace(5)* %ptr0, align 1
+ %ptr1 = getelementptr inbounds i8, i8 addrspace(5)* %ptr0, i32 1
+ %val1 = load i8, i8 addrspace(5)* %ptr1, align 1
%add = add i8 %val0, %val1
store i8 %add, i8 addrspace(1)* %out
ret void
}
; ALL-LABEL: @load_unknown_offset_align1_i16(
-; ALL: alloca [128 x i16], align 1{{$}}
-; UNALIGNED: load <2 x i16>, <2 x i16>* %{{[0-9]+}}, align 1{{$}}
+; ALL: alloca [128 x i16], align 1, addrspace(5){{$}}
+; UNALIGNED: load <2 x i16>, <2 x i16> addrspace(5)* %{{[0-9]+}}, align 1{{$}}
-; ALIGNED: load i16, i16* %ptr0, align 1{{$}}
-; ALIGNED: load i16, i16* %ptr1, align 1{{$}}
+; ALIGNED: load i16, i16 addrspace(5)* %ptr0, align 1{{$}}
+; ALIGNED: load i16, i16 addrspace(5)* %ptr1, align 1{{$}}
define amdgpu_kernel void @load_unknown_offset_align1_i16(i16 addrspace(1)* noalias %out, i32 %offset) #0 {
- %alloca = alloca [128 x i16], align 1
- %ptr0 = getelementptr inbounds [128 x i16], [128 x i16]* %alloca, i32 0, i32 %offset
- %val0 = load i16, i16* %ptr0, align 1
- %ptr1 = getelementptr inbounds i16, i16* %ptr0, i32 1
- %val1 = load i16, i16* %ptr1, align 1
+ %alloca = alloca [128 x i16], align 1, addrspace(5)
+ %ptr0 = getelementptr inbounds [128 x i16], [128 x i16] addrspace(5)* %alloca, i32 0, i32 %offset
+ %val0 = load i16, i16 addrspace(5)* %ptr0, align 1
+ %ptr1 = getelementptr inbounds i16, i16 addrspace(5)* %ptr0, i32 1
+ %val1 = load i16, i16 addrspace(5)* %ptr1, align 1
%add = add i16 %val0, %val1
store i16 %add, i16 addrspace(1)* %out
ret void
@@ -43,16 +42,16 @@ define amdgpu_kernel void @load_unknown_offset_align1_i16(i16 addrspace(1)* noal
; ALL-LABEL: @load_unknown_offset_align1_i32(
; ALL: alloca [128 x i32], align 1
-; UNALIGNED: load <2 x i32>, <2 x i32>* %{{[0-9]+}}, align 1{{$}}
+; UNALIGNED: load <2 x i32>, <2 x i32> addrspace(5)* %{{[0-9]+}}, align 1{{$}}
-; ALIGNED: load i32, i32* %ptr0, align 1
-; ALIGNED: load i32, i32* %ptr1, align 1
+; ALIGNED: load i32, i32 addrspace(5)* %ptr0, align 1
+; ALIGNED: load i32, i32 addrspace(5)* %ptr1, align 1
define amdgpu_kernel void @load_unknown_offset_align1_i32(i32 addrspace(1)* noalias %out, i32 %offset) #0 {
- %alloca = alloca [128 x i32], align 1
- %ptr0 = getelementptr inbounds [128 x i32], [128 x i32]* %alloca, i32 0, i32 %offset
- %val0 = load i32, i32* %ptr0, align 1
- %ptr1 = getelementptr inbounds i32, i32* %ptr0, i32 1
- %val1 = load i32, i32* %ptr1, align 1
+ %alloca = alloca [128 x i32], align 1, addrspace(5)
+ %ptr0 = getelementptr inbounds [128 x i32], [128 x i32] addrspace(5)* %alloca, i32 0, i32 %offset
+ %val0 = load i32, i32 addrspace(5)* %ptr0, align 1
+ %ptr1 = getelementptr inbounds i32, i32 addrspace(5)* %ptr0, i32 1
+ %val1 = load i32, i32 addrspace(5)* %ptr1, align 1
%add = add i32 %val0, %val1
store i32 %add, i32 addrspace(1)* %out
ret void
@@ -63,17 +62,17 @@ define amdgpu_kernel void @load_unknown_offset_align1_i32(i32 addrspace(1)* noal
; ALL-LABEL: @load_alloca16_unknown_offset_align1_i32(
; ALL: alloca [128 x i32], align 16
-; UNALIGNED: load <2 x i32>, <2 x i32>* %{{[0-9]+}}, align 1{{$}}
+; UNALIGNED: load <2 x i32>, <2 x i32> addrspace(5)* %{{[0-9]+}}, align 1{{$}}
; FIXME: Should change alignment
; ALIGNED: load i32
; ALIGNED: load i32
define amdgpu_kernel void @load_alloca16_unknown_offset_align1_i32(i32 addrspace(1)* noalias %out, i32 %offset) #0 {
- %alloca = alloca [128 x i32], align 16
- %ptr0 = getelementptr inbounds [128 x i32], [128 x i32]* %alloca, i32 0, i32 %offset
- %val0 = load i32, i32* %ptr0, align 1
- %ptr1 = getelementptr inbounds i32, i32* %ptr0, i32 1
- %val1 = load i32, i32* %ptr1, align 1
+ %alloca = alloca [128 x i32], align 16, addrspace(5)
+ %ptr0 = getelementptr inbounds [128 x i32], [128 x i32] addrspace(5)* %alloca, i32 0, i32 %offset
+ %val0 = load i32, i32 addrspace(5)* %ptr0, align 1
+ %ptr1 = getelementptr inbounds i32, i32 addrspace(5)* %ptr0, i32 1
+ %val1 = load i32, i32 addrspace(5)* %ptr1, align 1
%add = add i32 %val0, %val1
store i32 %add, i32 addrspace(1)* %out
ret void
@@ -81,31 +80,31 @@ define amdgpu_kernel void @load_alloca16_unknown_offset_align1_i32(i32 addrspace
; ALL-LABEL: @store_unknown_offset_align1_i8(
; ALL: alloca [128 x i8], align 1
-; UNALIGNED: store <2 x i8> <i8 9, i8 10>, <2 x i8>* %{{[0-9]+}}, align 1{{$}}
+; UNALIGNED: store <2 x i8> <i8 9, i8 10>, <2 x i8> addrspace(5)* %{{[0-9]+}}, align 1{{$}}
-; ALIGNED: store i8 9, i8* %ptr0, align 1{{$}}
-; ALIGNED: store i8 10, i8* %ptr1, align 1{{$}}
+; ALIGNED: store i8 9, i8 addrspace(5)* %ptr0, align 1{{$}}
+; ALIGNED: store i8 10, i8 addrspace(5)* %ptr1, align 1{{$}}
define amdgpu_kernel void @store_unknown_offset_align1_i8(i8 addrspace(1)* noalias %out, i32 %offset) #0 {
- %alloca = alloca [128 x i8], align 1
- %ptr0 = getelementptr inbounds [128 x i8], [128 x i8]* %alloca, i32 0, i32 %offset
- store i8 9, i8* %ptr0, align 1
- %ptr1 = getelementptr inbounds i8, i8* %ptr0, i32 1
- store i8 10, i8* %ptr1, align 1
+ %alloca = alloca [128 x i8], align 1, addrspace(5)
+ %ptr0 = getelementptr inbounds [128 x i8], [128 x i8] addrspace(5)* %alloca, i32 0, i32 %offset
+ store i8 9, i8 addrspace(5)* %ptr0, align 1
+ %ptr1 = getelementptr inbounds i8, i8 addrspace(5)* %ptr0, i32 1
+ store i8 10, i8 addrspace(5)* %ptr1, align 1
ret void
}
; ALL-LABEL: @store_unknown_offset_align1_i16(
; ALL: alloca [128 x i16], align 1
-; UNALIGNED: store <2 x i16> <i16 9, i16 10>, <2 x i16>* %{{[0-9]+}}, align 1{{$}}
+; UNALIGNED: store <2 x i16> <i16 9, i16 10>, <2 x i16> addrspace(5)* %{{[0-9]+}}, align 1{{$}}
-; ALIGNED: store i16 9, i16* %ptr0, align 1{{$}}
-; ALIGNED: store i16 10, i16* %ptr1, align 1{{$}}
+; ALIGNED: store i16 9, i16 addrspace(5)* %ptr0, align 1{{$}}
+; ALIGNED: store i16 10, i16 addrspace(5)* %ptr1, align 1{{$}}
define amdgpu_kernel void @store_unknown_offset_align1_i16(i16 addrspace(1)* noalias %out, i32 %offset) #0 {
- %alloca = alloca [128 x i16], align 1
- %ptr0 = getelementptr inbounds [128 x i16], [128 x i16]* %alloca, i32 0, i32 %offset
- store i16 9, i16* %ptr0, align 1
- %ptr1 = getelementptr inbounds i16, i16* %ptr0, i32 1
- store i16 10, i16* %ptr1, align 1
+ %alloca = alloca [128 x i16], align 1, addrspace(5)
+ %ptr0 = getelementptr inbounds [128 x i16], [128 x i16] addrspace(5)* %alloca, i32 0, i32 %offset
+ store i16 9, i16 addrspace(5)* %ptr0, align 1
+ %ptr1 = getelementptr inbounds i16, i16 addrspace(5)* %ptr0, i32 1
+ store i16 10, i16 addrspace(5)* %ptr1, align 1
ret void
}
@@ -115,16 +114,16 @@ define amdgpu_kernel void @store_unknown_offset_align1_i16(i16 addrspace(1)* noa
; ALL-LABEL: @store_unknown_offset_align1_i32(
; ALL: alloca [128 x i32], align 1
-; UNALIGNED: store <2 x i32> <i32 9, i32 10>, <2 x i32>* %{{[0-9]+}}, align 1{{$}}
+; UNALIGNED: store <2 x i32> <i32 9, i32 10>, <2 x i32> addrspace(5)* %{{[0-9]+}}, align 1{{$}}
-; ALIGNED: store i32 9, i32* %ptr0, align 1
-; ALIGNED: store i32 10, i32* %ptr1, align 1
+; ALIGNED: store i32 9, i32 addrspace(5)* %ptr0, align 1
+; ALIGNED: store i32 10, i32 addrspace(5)* %ptr1, align 1
define amdgpu_kernel void @store_unknown_offset_align1_i32(i32 addrspace(1)* noalias %out, i32 %offset) #0 {
- %alloca = alloca [128 x i32], align 1
- %ptr0 = getelementptr inbounds [128 x i32], [128 x i32]* %alloca, i32 0, i32 %offset
- store i32 9, i32* %ptr0, align 1
- %ptr1 = getelementptr inbounds i32, i32* %ptr0, i32 1
- store i32 10, i32* %ptr1, align 1
+ %alloca = alloca [128 x i32], align 1, addrspace(5)
+ %ptr0 = getelementptr inbounds [128 x i32], [128 x i32] addrspace(5)* %alloca, i32 0, i32 %offset
+ store i32 9, i32 addrspace(5)* %ptr0, align 1
+ %ptr1 = getelementptr inbounds i32, i32 addrspace(5)* %ptr0, i32 1
+ store i32 10, i32 addrspace(5)* %ptr1, align 1
ret void
}
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll
index 0fcdc7b9083..43352783d10 100644
--- a/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll
+++ b/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll
@@ -5,7 +5,6 @@
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mattr=+max-private-element-size-8,+unaligned-scratch-access -load-store-vectorizer -S -o - %s | FileCheck -check-prefixes=ELT8,ELT8-UNALIGNED,UNALIGNED,ALL %s
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mattr=+max-private-element-size-16,+unaligned-scratch-access -load-store-vectorizer -S -o - %s | FileCheck -check-prefixes=ELT16,ELT16-UNALIGNED,UNALIGNED,ALL %s
-target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v4i32
; ALIGNED: store i32
@@ -17,52 +16,52 @@ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:
; ELT8-UNALIGNED: store <2 x i32>
; ELT16-UNALIGNED: store <4 x i32>
-define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32(i32* %out) #0 {
- %out.gep.1 = getelementptr i32, i32* %out, i32 1
- %out.gep.2 = getelementptr i32, i32* %out, i32 2
- %out.gep.3 = getelementptr i32, i32* %out, i32 3
-
- store i32 9, i32* %out
- store i32 1, i32* %out.gep.1
- store i32 23, i32* %out.gep.2
- store i32 19, i32* %out.gep.3
+define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32(i32 addrspace(5)* %out) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(5)* %out, i32 1
+ %out.gep.2 = getelementptr i32, i32 addrspace(5)* %out, i32 2
+ %out.gep.3 = getelementptr i32, i32 addrspace(5)* %out, i32 3
+
+ store i32 9, i32 addrspace(5)* %out
+ store i32 1, i32 addrspace(5)* %out.gep.1
+ store i32 23, i32 addrspace(5)* %out.gep.2
+ store i32 19, i32 addrspace(5)* %out.gep.3
ret void
}
; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v4i32_align1(
-; ALIGNED: store i32 9, i32* %out, align 1
-; ALIGNED: store i32 1, i32* %out.gep.1, align 1
-; ALIGNED: store i32 23, i32* %out.gep.2, align 1
-; ALIGNED: store i32 19, i32* %out.gep.3, align 1
+; ALIGNED: store i32 9, i32 addrspace(5)* %out, align 1
+; ALIGNED: store i32 1, i32 addrspace(5)* %out.gep.1, align 1
+; ALIGNED: store i32 23, i32 addrspace(5)* %out.gep.2, align 1
+; ALIGNED: store i32 19, i32 addrspace(5)* %out.gep.3, align 1
-; ELT16-UNALIGNED: store <4 x i32> <i32 9, i32 1, i32 23, i32 19>, <4 x i32>* %1, align 1
+; ELT16-UNALIGNED: store <4 x i32> <i32 9, i32 1, i32 23, i32 19>, <4 x i32> addrspace(5)* %1, align 1
-; ELT8-UNALIGNED: store <2 x i32> <i32 9, i32 1>, <2 x i32>* %1, align 1
-; ELT8-UNALIGNED: store <2 x i32> <i32 23, i32 19>, <2 x i32>* %2, align 1
+; ELT8-UNALIGNED: store <2 x i32> <i32 9, i32 1>, <2 x i32> addrspace(5)* %1, align 1
+; ELT8-UNALIGNED: store <2 x i32> <i32 23, i32 19>, <2 x i32> addrspace(5)* %2, align 1
; ELT4-UNALIGNED: store i32
; ELT4-UNALIGNED: store i32
; ELT4-UNALIGNED: store i32
; ELT4-UNALIGNED: store i32
-define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32_align1(i32* %out) #0 {
- %out.gep.1 = getelementptr i32, i32* %out, i32 1
- %out.gep.2 = getelementptr i32, i32* %out, i32 2
- %out.gep.3 = getelementptr i32, i32* %out, i32 3
-
- store i32 9, i32* %out, align 1
- store i32 1, i32* %out.gep.1, align 1
- store i32 23, i32* %out.gep.2, align 1
- store i32 19, i32* %out.gep.3, align 1
+define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32_align1(i32 addrspace(5)* %out) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(5)* %out, i32 1
+ %out.gep.2 = getelementptr i32, i32 addrspace(5)* %out, i32 2
+ %out.gep.3 = getelementptr i32, i32 addrspace(5)* %out, i32 3
+
+ store i32 9, i32 addrspace(5)* %out, align 1
+ store i32 1, i32 addrspace(5)* %out.gep.1, align 1
+ store i32 23, i32 addrspace(5)* %out.gep.2, align 1
+ store i32 19, i32 addrspace(5)* %out.gep.3, align 1
ret void
}
; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v4i32_align2(
-; ALIGNED: store i32 9, i32* %out, align 2
-; ALIGNED: store i32 1, i32* %out.gep.1, align 2
-; ALIGNED: store i32 23, i32* %out.gep.2, align 2
-; ALIGNED: store i32 19, i32* %out.gep.3, align 2
+; ALIGNED: store i32 9, i32 addrspace(5)* %out, align 2
+; ALIGNED: store i32 1, i32 addrspace(5)* %out.gep.1, align 2
+; ALIGNED: store i32 23, i32 addrspace(5)* %out.gep.2, align 2
+; ALIGNED: store i32 19, i32 addrspace(5)* %out.gep.3, align 2
-; ELT16-UNALIGNED: store <4 x i32> <i32 9, i32 1, i32 23, i32 19>, <4 x i32>* %1, align 2
+; ELT16-UNALIGNED: store <4 x i32> <i32 9, i32 1, i32 23, i32 19>, <4 x i32> addrspace(5)* %1, align 2
; ELT8-UNALIGNED: store <2 x i32>
; ELT8-UNALIGNED: store <2 x i32>
@@ -71,29 +70,29 @@ define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32_align1(
; ELT4-UNALIGNED: store i32
; ELT4-UNALIGNED: store i32
; ELT4-UNALIGNED: store i32
-define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32_align2(i32* %out) #0 {
- %out.gep.1 = getelementptr i32, i32* %out, i32 1
- %out.gep.2 = getelementptr i32, i32* %out, i32 2
- %out.gep.3 = getelementptr i32, i32* %out, i32 3
-
- store i32 9, i32* %out, align 2
- store i32 1, i32* %out.gep.1, align 2
- store i32 23, i32* %out.gep.2, align 2
- store i32 19, i32* %out.gep.3, align 2
+define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32_align2(i32 addrspace(5)* %out) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(5)* %out, i32 1
+ %out.gep.2 = getelementptr i32, i32 addrspace(5)* %out, i32 2
+ %out.gep.3 = getelementptr i32, i32 addrspace(5)* %out, i32 3
+
+ store i32 9, i32 addrspace(5)* %out, align 2
+ store i32 1, i32 addrspace(5)* %out.gep.1, align 2
+ store i32 23, i32 addrspace(5)* %out.gep.2, align 2
+ store i32 19, i32 addrspace(5)* %out.gep.3, align 2
ret void
}
; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v4i8(
; ALL: store <4 x i8>
-define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i8(i8* %out) #0 {
- %out.gep.1 = getelementptr i8, i8* %out, i32 1
- %out.gep.2 = getelementptr i8, i8* %out, i32 2
- %out.gep.3 = getelementptr i8, i8* %out, i32 3
-
- store i8 9, i8* %out, align 4
- store i8 1, i8* %out.gep.1
- store i8 23, i8* %out.gep.2
- store i8 19, i8* %out.gep.3
+define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i8(i8 addrspace(5)* %out) #0 {
+ %out.gep.1 = getelementptr i8, i8 addrspace(5)* %out, i32 1
+ %out.gep.2 = getelementptr i8, i8 addrspace(5)* %out, i32 2
+ %out.gep.3 = getelementptr i8, i8 addrspace(5)* %out, i32 3
+
+ store i8 9, i8 addrspace(5)* %out, align 4
+ store i8 1, i8 addrspace(5)* %out.gep.1
+ store i8 23, i8 addrspace(5)* %out.gep.2
+ store i8 19, i8 addrspace(5)* %out.gep.3
ret void
}
@@ -103,26 +102,26 @@ define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i8(i8* %out
; ALIGNED: store i8
; ALIGNED: store i8
-; UNALIGNED: store <4 x i8> <i8 9, i8 1, i8 23, i8 19>, <4 x i8>* %1, align 1
-define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i8_align1(i8* %out) #0 {
- %out.gep.1 = getelementptr i8, i8* %out, i32 1
- %out.gep.2 = getelementptr i8, i8* %out, i32 2
- %out.gep.3 = getelementptr i8, i8* %out, i32 3
+; UNALIGNED: store <4 x i8> <i8 9, i8 1, i8 23, i8 19>, <4 x i8> addrspace(5)* %1, align 1
+define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i8_align1(i8 addrspace(5)* %out) #0 {
+ %out.gep.1 = getelementptr i8, i8 addrspace(5)* %out, i32 1
+ %out.gep.2 = getelementptr i8, i8 addrspace(5)* %out, i32 2
+ %out.gep.3 = getelementptr i8, i8 addrspace(5)* %out, i32 3
- store i8 9, i8* %out, align 1
- store i8 1, i8* %out.gep.1, align 1
- store i8 23, i8* %out.gep.2, align 1
- store i8 19, i8* %out.gep.3, align 1
+ store i8 9, i8 addrspace(5)* %out, align 1
+ store i8 1, i8 addrspace(5)* %out.gep.1, align 1
+ store i8 23, i8 addrspace(5)* %out.gep.2, align 1
+ store i8 19, i8 addrspace(5)* %out.gep.3, align 1
ret void
}
; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v2i16(
; ALL: store <2 x i16>
-define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16(i16* %out) #0 {
- %out.gep.1 = getelementptr i16, i16* %out, i32 1
+define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16(i16 addrspace(5)* %out) #0 {
+ %out.gep.1 = getelementptr i16, i16 addrspace(5)* %out, i32 1
- store i16 9, i16* %out, align 4
- store i16 12, i16* %out.gep.1
+ store i16 9, i16 addrspace(5)* %out, align 4
+ store i16 12, i16 addrspace(5)* %out.gep.1
ret void
}
@@ -130,12 +129,12 @@ define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16(i16* %o
; ALIGNED: store i16
; ALIGNED: store i16
-; UNALIGNED: store <2 x i16> <i16 9, i16 12>, <2 x i16>* %1, align 2
-define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16_align2(i16* %out) #0 {
- %out.gep.1 = getelementptr i16, i16* %out, i32 1
+; UNALIGNED: store <2 x i16> <i16 9, i16 12>, <2 x i16> addrspace(5)* %1, align 2
+define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16_align2(i16 addrspace(5)* %out) #0 {
+ %out.gep.1 = getelementptr i16, i16 addrspace(5)* %out, i32 1
- store i16 9, i16* %out, align 2
- store i16 12, i16* %out.gep.1, align 2
+ store i16 9, i16 addrspace(5)* %out, align 2
+ store i16 12, i16 addrspace(5)* %out.gep.1, align 2
ret void
}
@@ -143,22 +142,22 @@ define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16_align2(
; ALIGNED: store i16
; ALIGNED: store i16
-; UNALIGNED: store <2 x i16> <i16 9, i16 12>, <2 x i16>* %1, align 1
-define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16_align1(i16* %out) #0 {
- %out.gep.1 = getelementptr i16, i16* %out, i32 1
+; UNALIGNED: store <2 x i16> <i16 9, i16 12>, <2 x i16> addrspace(5)* %1, align 1
+define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16_align1(i16 addrspace(5)* %out) #0 {
+ %out.gep.1 = getelementptr i16, i16 addrspace(5)* %out, i32 1
- store i16 9, i16* %out, align 1
- store i16 12, i16* %out.gep.1, align 1
+ store i16 9, i16 addrspace(5)* %out, align 1
+ store i16 12, i16 addrspace(5)* %out.gep.1, align 1
ret void
}
; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v2i16_align8(
-; ALL: store <2 x i16> <i16 9, i16 12>, <2 x i16>* %1, align 8
-define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16_align8(i16* %out) #0 {
- %out.gep.1 = getelementptr i16, i16* %out, i32 1
+; ALL: store <2 x i16> <i16 9, i16 12>, <2 x i16> addrspace(5)* %1, align 8
+define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16_align8(i16 addrspace(5)* %out) #0 {
+ %out.gep.1 = getelementptr i16, i16 addrspace(5)* %out, i32 1
- store i16 9, i16* %out, align 8
- store i16 12, i16* %out.gep.1, align 2
+ store i16 9, i16 addrspace(5)* %out, align 8
+ store i16 12, i16 addrspace(5)* %out.gep.1, align 2
ret void
}
@@ -179,13 +178,13 @@ define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v2i16_align8(
; ELT16-ALIGNED: store i32
; ELT16-UNALIGNED: store <3 x i32>
-define amdgpu_kernel void @merge_private_store_3_vector_elts_loads_v4i32(i32* %out) #0 {
- %out.gep.1 = getelementptr i32, i32* %out, i32 1
- %out.gep.2 = getelementptr i32, i32* %out, i32 2
+define amdgpu_kernel void @merge_private_store_3_vector_elts_loads_v4i32(i32 addrspace(5)* %out) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(5)* %out, i32 1
+ %out.gep.2 = getelementptr i32, i32 addrspace(5)* %out, i32 2
- store i32 9, i32* %out
- store i32 1, i32* %out.gep.1
- store i32 23, i32* %out.gep.2
+ store i32 9, i32 addrspace(5)* %out
+ store i32 1, i32 addrspace(5)* %out.gep.1
+ store i32 23, i32 addrspace(5)* %out.gep.2
ret void
}
@@ -202,13 +201,13 @@ define amdgpu_kernel void @merge_private_store_3_vector_elts_loads_v4i32(i32* %o
; ELT8-UNALIGNED: store i32
; ELT16-UNALIGNED: store <3 x i32>
-define amdgpu_kernel void @merge_private_store_3_vector_elts_loads_v4i32_align1(i32* %out) #0 {
- %out.gep.1 = getelementptr i32, i32* %out, i32 1
- %out.gep.2 = getelementptr i32, i32* %out, i32 2
+define amdgpu_kernel void @merge_private_store_3_vector_elts_loads_v4i32_align1(i32 addrspace(5)* %out) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(5)* %out, i32 1
+ %out.gep.2 = getelementptr i32, i32 addrspace(5)* %out, i32 2
- store i32 9, i32* %out, align 1
- store i32 1, i32* %out.gep.1, align 1
- store i32 23, i32* %out.gep.2, align 1
+ store i32 9, i32 addrspace(5)* %out, align 1
+ store i32 1, i32 addrspace(5)* %out.gep.1, align 1
+ store i32 23, i32 addrspace(5)* %out.gep.2, align 1
ret void
}
@@ -218,13 +217,13 @@ define amdgpu_kernel void @merge_private_store_3_vector_elts_loads_v4i32_align1(
; ALIGNED: store i8
; UNALIGNED: store <3 x i8>
-define amdgpu_kernel void @merge_private_store_3_vector_elts_loads_v4i8_align1(i8* %out) #0 {
- %out.gep.1 = getelementptr i8, i8* %out, i8 1
- %out.gep.2 = getelementptr i8, i8* %out, i8 2
+define amdgpu_kernel void @merge_private_store_3_vector_elts_loads_v4i8_align1(i8 addrspace(5)* %out) #0 {
+ %out.gep.1 = getelementptr i8, i8 addrspace(5)* %out, i8 1
+ %out.gep.2 = getelementptr i8, i8 addrspace(5)* %out, i8 2
- store i8 9, i8* %out, align 1
- store i8 1, i8* %out.gep.1, align 1
- store i8 23, i8* %out.gep.2, align 1
+ store i8 9, i8 addrspace(5)* %out, align 1
+ store i8 1, i8 addrspace(5)* %out.gep.1, align 1
+ store i8 23, i8 addrspace(5)* %out.gep.2, align 1
ret void
}
diff --git a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-crash.ll b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-crash.ll
index 02c3c05e794..d558aa24304 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-crash.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/AMDGPU/different-addrspace-crash.ll
@@ -1,6 +1,5 @@
; RUN: llc < %s | FileCheck %s
-target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
target triple = "amdgcn--"
; We need to compile this for a target where we have different address spaces,
@@ -21,9 +20,9 @@ entry:
loop:
%idx0 = phi i32 [ %next_idx0, %loop ], [ 0, %entry ]
- %0 = getelementptr inbounds i32, i32* null, i32 %idx0
+ %0 = getelementptr inbounds i32, i32 addrspace(5)* null, i32 %idx0
%1 = getelementptr inbounds i32, i32 addrspace(1)* null, i32 %idx0
- store i32 1, i32* %0
+ store i32 1, i32 addrspace(5)* %0
store i32 7, i32 addrspace(1)* %1
%next_idx0 = add nuw nsw i32 %idx0, 1
br label %loop
diff --git a/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-for-private.ll b/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-for-private.ll
index e986c3dc2a2..362001fff92 100644
--- a/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-for-private.ll
+++ b/llvm/test/Transforms/LoopUnroll/AMDGPU/unroll-for-private.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=amdgcn-unknown-amdhsa -loop-unroll -S -amdgpu-unroll-threshold-private=20000 %s | FileCheck %s
+; RUN: opt -data-layout=A5 -mtriple=amdgcn-unknown-amdhsa -loop-unroll -S -amdgpu-unroll-threshold-private=20000 %s | FileCheck %s
; Check that we full unroll loop to be able to eliminate alloca
; CHECK-LABEL: @non_invariant_ind
@@ -9,13 +9,13 @@
define amdgpu_kernel void @non_invariant_ind(i32 addrspace(1)* nocapture %a, i32 %x) {
entry:
- %arr = alloca [64 x i32], align 4
+ %arr = alloca [64 x i32], align 4, addrspace(5)
%tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #1
br label %for.body
for.cond.cleanup: ; preds = %for.body
- %arrayidx5 = getelementptr inbounds [64 x i32], [64 x i32]* %arr, i32 0, i32 %x
- %tmp15 = load i32, i32* %arrayidx5, align 4
+ %arrayidx5 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %arr, i32 0, i32 %x
+ %tmp15 = load i32, i32 addrspace(5)* %arrayidx5, align 4
%arrayidx7 = getelementptr inbounds i32, i32 addrspace(1)* %a, i32 %tmp1
store i32 %tmp15, i32 addrspace(1)* %arrayidx7, align 4
ret void
@@ -27,8 +27,8 @@ for.body: ; preds = %for.body, %entry
%tmp16 = load i32, i32 addrspace(1)* %arrayidx, align 4
%add = add nsw i32 %i.015, %tmp1
%rem = srem i32 %add, 64
- %arrayidx3 = getelementptr inbounds [64 x i32], [64 x i32]* %arr, i32 0, i32 %rem
- store i32 %tmp16, i32* %arrayidx3, align 4
+ %arrayidx3 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %arr, i32 0, i32 %rem
+ store i32 %tmp16, i32 addrspace(5)* %arrayidx3, align 4
%inc = add nuw nsw i32 %i.015, 1
%exitcond = icmp eq i32 %inc, 100
br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -42,7 +42,7 @@ for.body: ; preds = %for.body, %entry
define amdgpu_kernel void @invariant_ind(i32 addrspace(1)* nocapture %a, i32 %x) {
entry:
- %arr = alloca [64 x i32], align 4
+ %arr = alloca [64 x i32], align 4, addrspace(5)
%tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #1
br label %for.cond2.preheader
@@ -54,8 +54,8 @@ for.cond2.preheader: ; preds = %for.cond.cleanup5,
br label %for.body6
for.cond.cleanup: ; preds = %for.cond.cleanup5
- %arrayidx13 = getelementptr inbounds [64 x i32], [64 x i32]* %arr, i32 0, i32 %x
- %tmp16 = load i32, i32* %arrayidx13, align 4
+ %arrayidx13 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %arr, i32 0, i32 %x
+ %tmp16 = load i32, i32 addrspace(5)* %arrayidx13, align 4
%arrayidx15 = getelementptr inbounds i32, i32 addrspace(1)* %a, i32 %tmp1
store i32 %tmp16, i32 addrspace(1)* %arrayidx15, align 4
ret void
@@ -69,8 +69,8 @@ for.body6: ; preds = %for.body6, %for.con
%j.025 = phi i32 [ 0, %for.cond2.preheader ], [ %inc, %for.body6 ]
%add = add nsw i32 %j.025, %tmp1
%rem = srem i32 %add, 64
- %arrayidx8 = getelementptr inbounds [64 x i32], [64 x i32]* %arr, i32 0, i32 %rem
- store i32 %tmp15, i32* %arrayidx8, align 4
+ %arrayidx8 = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %arr, i32 0, i32 %rem
+ store i32 %tmp15, i32 addrspace(5)* %arrayidx8, align 4
%inc = add nuw nsw i32 %j.025, 1
%exitcond = icmp eq i32 %inc, 100
br i1 %exitcond, label %for.cond.cleanup5, label %for.body6
@@ -84,13 +84,13 @@ for.body6: ; preds = %for.body6, %for.con
define amdgpu_kernel void @too_big(i32 addrspace(1)* nocapture %a, i32 %x) {
entry:
- %arr = alloca [256 x i32], align 4
+ %arr = alloca [256 x i32], align 4, addrspace(5)
%tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #1
br label %for.body
for.cond.cleanup: ; preds = %for.body
- %arrayidx5 = getelementptr inbounds [256 x i32], [256 x i32]* %arr, i32 0, i32 %x
- %tmp15 = load i32, i32* %arrayidx5, align 4
+ %arrayidx5 = getelementptr inbounds [256 x i32], [256 x i32] addrspace(5)* %arr, i32 0, i32 %x
+ %tmp15 = load i32, i32 addrspace(5)* %arrayidx5, align 4
%arrayidx7 = getelementptr inbounds i32, i32 addrspace(1)* %a, i32 %tmp1
store i32 %tmp15, i32 addrspace(1)* %arrayidx7, align 4
ret void
@@ -102,8 +102,8 @@ for.body: ; preds = %for.body, %entry
%tmp16 = load i32, i32 addrspace(1)* %arrayidx, align 4
%add = add nsw i32 %i.015, %tmp1
%rem = srem i32 %add, 64
- %arrayidx3 = getelementptr inbounds [256 x i32], [256 x i32]* %arr, i32 0, i32 %rem
- store i32 %tmp16, i32* %arrayidx3, align 4
+ %arrayidx3 = getelementptr inbounds [256 x i32], [256 x i32] addrspace(5)* %arr, i32 0, i32 %rem
+ store i32 %tmp16, i32 addrspace(5)* %arrayidx3, align 4
%inc = add nuw nsw i32 %i.015, 1
%exitcond = icmp eq i32 %inc, 100
br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -118,13 +118,13 @@ for.body: ; preds = %for.body, %entry
define amdgpu_kernel void @dynamic_size_alloca(i32 addrspace(1)* nocapture %a, i32 %n, i32 %x) {
entry:
- %arr = alloca i32, i32 %n, align 4
+ %arr = alloca i32, i32 %n, align 4, addrspace(5)
%tmp1 = tail call i32 @llvm.amdgcn.workitem.id.x() #1
br label %for.body
for.cond.cleanup: ; preds = %for.body
- %arrayidx5 = getelementptr inbounds i32, i32* %arr, i32 %x
- %tmp15 = load i32, i32* %arrayidx5, align 4
+ %arrayidx5 = getelementptr inbounds i32, i32 addrspace(5)* %arr, i32 %x
+ %tmp15 = load i32, i32 addrspace(5)* %arrayidx5, align 4
%arrayidx7 = getelementptr inbounds i32, i32 addrspace(1)* %a, i32 %tmp1
store i32 %tmp15, i32 addrspace(1)* %arrayidx7, align 4
ret void
@@ -136,8 +136,8 @@ for.body: ; preds = %for.body, %entry
%tmp16 = load i32, i32 addrspace(1)* %arrayidx, align 4
%add = add nsw i32 %i.015, %tmp1
%rem = srem i32 %add, 64
- %arrayidx3 = getelementptr inbounds i32, i32* %arr, i32 %rem
- store i32 %tmp16, i32* %arrayidx3, align 4
+ %arrayidx3 = getelementptr inbounds i32, i32 addrspace(5)* %arr, i32 %rem
+ store i32 %tmp16, i32 addrspace(5)* %arrayidx3, align 4
%inc = add nuw nsw i32 %i.015, 1
%exitcond = icmp eq i32 %inc, 100
br i1 %exitcond, label %for.cond.cleanup, label %for.body
OpenPOWER on IntegriCloud