summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU
diff options
context:
space:
mode:
authorKonstantin Zhuravlyov <kzhuravl_dev@outlook.com>2016-09-06 20:22:28 +0000
committerKonstantin Zhuravlyov <kzhuravl_dev@outlook.com>2016-09-06 20:22:28 +0000
commit1d65026ca6844f7401e3aa4a9f3f0d3e1ed9f406 (patch)
tree45da483f298e8c48c9c8d6d94a342347cfcc446b /llvm/test/CodeGen/AMDGPU
parentec73f5dacf1da07eb60321f843cd0d6bbbf9e1bc (diff)
downloadbcm5719-llvm-1d65026ca6844f7401e3aa4a9f3f0d3e1ed9f406.tar.gz
bcm5719-llvm-1d65026ca6844f7401e3aa4a9f3f0d3e1ed9f406.zip
[AMDGPU] Wave and register controls
- Implemented amdgpu-flat-work-group-size attribute - Implemented amdgpu-num-active-waves-per-eu attribute - Implemented amdgpu-num-sgpr attribute - Implemented amdgpu-num-vgpr attribute - Dynamic LDS constraints are in a separate patch Patch by Tom Stellard and Konstantin Zhuravlyov Differential Revision: https://reviews.llvm.org/D21562 llvm-svn: 280747
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
-rw-r--r--llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll129
-rw-r--r--llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll17
-rw-r--r--llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-vgpr.ll75
-rw-r--r--llvm/test/CodeGen/AMDGPU/attr-unparseable.ll57
-rw-r--r--llvm/test/CodeGen/AMDGPU/indirect-private-64.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll14
-rw-r--r--llvm/test/CodeGen/AMDGPU/large-work-group-registers.ll41
-rw-r--r--llvm/test/CodeGen/AMDGPU/load-constant-i16.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/private-memory-r600.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/target-cpu.ll4
18 files changed, 299 insertions, 62 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll b/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll
index bd0817d3041..d2de0c8a169 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll
@@ -545,7 +545,7 @@ entry:
ret void
}
-attributes #0 = { nounwind "amdgpu-max-waves-per-eu"="2" }
+attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,2" }
; HSAOPT: !0 = !{}
; HSAOPT: !1 = !{i32 0, i32 2048}
diff --git a/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll b/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll
index b00fff0a6f9..f190bd0cb01 100644
--- a/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll
@@ -47,6 +47,6 @@ define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 add
ret void
}
-attributes #0 = { nounwind "amdgpu-max-waves-per-eu"="1" }
+attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind convergent }
diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll
new file mode 100644
index 00000000000..6b419400615
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size.ll
@@ -0,0 +1,129 @@
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s
+
+; CHECK-LABEL: {{^}}min_64_max_64:
+; CHECK: SGPRBlocks: 0
+; CHECK: VGPRBlocks: 0
+; CHECK: NumSGPRsForWavesPerEU: 1
+; CHECK: NumVGPRsForWavesPerEU: 1
+define void @min_64_max_64() #0 {
+entry:
+ ret void
+}
+attributes #0 = {"amdgpu-flat-work-group-size"="64,64"}
+
+; CHECK-LABEL: {{^}}min_64_max_128:
+; CHECK: SGPRBlocks: 0
+; CHECK: VGPRBlocks: 0
+; CHECK: NumSGPRsForWavesPerEU: 1
+; CHECK: NumVGPRsForWavesPerEU: 1
+define void @min_64_max_128() #1 {
+entry:
+ ret void
+}
+attributes #1 = {"amdgpu-flat-work-group-size"="64,128"}
+
+; CHECK-LABEL: {{^}}min_128_max_128:
+; CHECK: SGPRBlocks: 0
+; CHECK: VGPRBlocks: 0
+; CHECK: NumSGPRsForWavesPerEU: 1
+; CHECK: NumVGPRsForWavesPerEU: 1
+define void @min_128_max_128() #2 {
+entry:
+ ret void
+}
+attributes #2 = {"amdgpu-flat-work-group-size"="128,128"}
+
+; CHECK-LABEL: {{^}}min_1024_max_2048
+; CHECK: SGPRBlocks: 2
+; CHECK: VGPRBlocks: 7
+; CHECK: NumSGPRsForWavesPerEU: 19
+; CHECK: NumVGPRsForWavesPerEU: 32
+@var = addrspace(1) global float 0.0
+define void @min_1024_max_2048() #3 {
+ %val0 = load volatile float, float addrspace(1)* @var
+ %val1 = load volatile float, float addrspace(1)* @var
+ %val2 = load volatile float, float addrspace(1)* @var
+ %val3 = load volatile float, float addrspace(1)* @var
+ %val4 = load volatile float, float addrspace(1)* @var
+ %val5 = load volatile float, float addrspace(1)* @var
+ %val6 = load volatile float, float addrspace(1)* @var
+ %val7 = load volatile float, float addrspace(1)* @var
+ %val8 = load volatile float, float addrspace(1)* @var
+ %val9 = load volatile float, float addrspace(1)* @var
+ %val10 = load volatile float, float addrspace(1)* @var
+ %val11 = load volatile float, float addrspace(1)* @var
+ %val12 = load volatile float, float addrspace(1)* @var
+ %val13 = load volatile float, float addrspace(1)* @var
+ %val14 = load volatile float, float addrspace(1)* @var
+ %val15 = load volatile float, float addrspace(1)* @var
+ %val16 = load volatile float, float addrspace(1)* @var
+ %val17 = load volatile float, float addrspace(1)* @var
+ %val18 = load volatile float, float addrspace(1)* @var
+ %val19 = load volatile float, float addrspace(1)* @var
+ %val20 = load volatile float, float addrspace(1)* @var
+ %val21 = load volatile float, float addrspace(1)* @var
+ %val22 = load volatile float, float addrspace(1)* @var
+ %val23 = load volatile float, float addrspace(1)* @var
+ %val24 = load volatile float, float addrspace(1)* @var
+ %val25 = load volatile float, float addrspace(1)* @var
+ %val26 = load volatile float, float addrspace(1)* @var
+ %val27 = load volatile float, float addrspace(1)* @var
+ %val28 = load volatile float, float addrspace(1)* @var
+ %val29 = load volatile float, float addrspace(1)* @var
+ %val30 = load volatile float, float addrspace(1)* @var
+ %val31 = load volatile float, float addrspace(1)* @var
+ %val32 = load volatile float, float addrspace(1)* @var
+ %val33 = load volatile float, float addrspace(1)* @var
+ %val34 = load volatile float, float addrspace(1)* @var
+ %val35 = load volatile float, float addrspace(1)* @var
+ %val36 = load volatile float, float addrspace(1)* @var
+ %val37 = load volatile float, float addrspace(1)* @var
+ %val38 = load volatile float, float addrspace(1)* @var
+ %val39 = load volatile float, float addrspace(1)* @var
+ %val40 = load volatile float, float addrspace(1)* @var
+
+ store volatile float %val0, float addrspace(1)* @var
+ store volatile float %val1, float addrspace(1)* @var
+ store volatile float %val2, float addrspace(1)* @var
+ store volatile float %val3, float addrspace(1)* @var
+ store volatile float %val4, float addrspace(1)* @var
+ store volatile float %val5, float addrspace(1)* @var
+ store volatile float %val6, float addrspace(1)* @var
+ store volatile float %val7, float addrspace(1)* @var
+ store volatile float %val8, float addrspace(1)* @var
+ store volatile float %val9, float addrspace(1)* @var
+ store volatile float %val10, float addrspace(1)* @var
+ store volatile float %val11, float addrspace(1)* @var
+ store volatile float %val12, float addrspace(1)* @var
+ store volatile float %val13, float addrspace(1)* @var
+ store volatile float %val14, float addrspace(1)* @var
+ store volatile float %val15, float addrspace(1)* @var
+ store volatile float %val16, float addrspace(1)* @var
+ store volatile float %val17, float addrspace(1)* @var
+ store volatile float %val18, float addrspace(1)* @var
+ store volatile float %val19, float addrspace(1)* @var
+ store volatile float %val20, float addrspace(1)* @var
+ store volatile float %val21, float addrspace(1)* @var
+ store volatile float %val22, float addrspace(1)* @var
+ store volatile float %val23, float addrspace(1)* @var
+ store volatile float %val24, float addrspace(1)* @var
+ store volatile float %val25, float addrspace(1)* @var
+ store volatile float %val26, float addrspace(1)* @var
+ store volatile float %val27, float addrspace(1)* @var
+ store volatile float %val28, float addrspace(1)* @var
+ store volatile float %val29, float addrspace(1)* @var
+ store volatile float %val30, float addrspace(1)* @var
+ store volatile float %val31, float addrspace(1)* @var
+ store volatile float %val32, float addrspace(1)* @var
+ store volatile float %val33, float addrspace(1)* @var
+ store volatile float %val34, float addrspace(1)* @var
+ store volatile float %val35, float addrspace(1)* @var
+ store volatile float %val36, float addrspace(1)* @var
+ store volatile float %val37, float addrspace(1)* @var
+ store volatile float %val38, float addrspace(1)* @var
+ store volatile float %val39, float addrspace(1)* @var
+ store volatile float %val40, float addrspace(1)* @var
+
+ ret void
+}
+attributes #3 = {"amdgpu-flat-work-group-size"="1024,2048"}
diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll
new file mode 100644
index 00000000000..404b125723c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll
@@ -0,0 +1,17 @@
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s
+
+; CHECK-LABEL: {{^}}max_18_sgprs:
+; CHECK: SGPRBlocks: 1
+; CHECK: NumSGPRsForWavesPerEU: 13
+define void @max_18_sgprs(i32 addrspace(1)* %out1,
+ i32 addrspace(1)* %out2,
+ i32 addrspace(1)* %out3,
+ i32 addrspace(1)* %out4,
+ i32 %one, i32 %two, i32 %three, i32 %four) #0 {
+ store i32 %one, i32 addrspace(1)* %out1
+ store i32 %two, i32 addrspace(1)* %out2
+ store i32 %three, i32 addrspace(1)* %out3
+ store i32 %four, i32 addrspace(1)* %out4
+ ret void
+}
+attributes #0 = {"amdgpu-num-sgpr"="18"}
diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-vgpr.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-vgpr.ll
new file mode 100644
index 00000000000..97feb7276b7
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-vgpr.ll
@@ -0,0 +1,75 @@
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s
+
+@var = addrspace(1) global float 0.0
+
+; CHECK-LABEL: {{^}}max_20_vgprs:
+; CHECK: VGPRBlocks: 4
+; CHECK: NumVGPRsForWavesPerEU: 20
+define void @max_20_vgprs() #1 {
+ %val0 = load volatile float, float addrspace(1)* @var
+ %val1 = load volatile float, float addrspace(1)* @var
+ %val2 = load volatile float, float addrspace(1)* @var
+ %val3 = load volatile float, float addrspace(1)* @var
+ %val4 = load volatile float, float addrspace(1)* @var
+ %val5 = load volatile float, float addrspace(1)* @var
+ %val6 = load volatile float, float addrspace(1)* @var
+ %val7 = load volatile float, float addrspace(1)* @var
+ %val8 = load volatile float, float addrspace(1)* @var
+ %val9 = load volatile float, float addrspace(1)* @var
+ %val10 = load volatile float, float addrspace(1)* @var
+ %val11 = load volatile float, float addrspace(1)* @var
+ %val12 = load volatile float, float addrspace(1)* @var
+ %val13 = load volatile float, float addrspace(1)* @var
+ %val14 = load volatile float, float addrspace(1)* @var
+ %val15 = load volatile float, float addrspace(1)* @var
+ %val16 = load volatile float, float addrspace(1)* @var
+ %val17 = load volatile float, float addrspace(1)* @var
+ %val18 = load volatile float, float addrspace(1)* @var
+ %val19 = load volatile float, float addrspace(1)* @var
+ %val20 = load volatile float, float addrspace(1)* @var
+ %val21 = load volatile float, float addrspace(1)* @var
+ %val22 = load volatile float, float addrspace(1)* @var
+ %val23 = load volatile float, float addrspace(1)* @var
+ %val24 = load volatile float, float addrspace(1)* @var
+ %val25 = load volatile float, float addrspace(1)* @var
+ %val26 = load volatile float, float addrspace(1)* @var
+ %val27 = load volatile float, float addrspace(1)* @var
+ %val28 = load volatile float, float addrspace(1)* @var
+ %val29 = load volatile float, float addrspace(1)* @var
+ %val30 = load volatile float, float addrspace(1)* @var
+
+ store volatile float %val0, float addrspace(1)* @var
+ store volatile float %val1, float addrspace(1)* @var
+ store volatile float %val2, float addrspace(1)* @var
+ store volatile float %val3, float addrspace(1)* @var
+ store volatile float %val4, float addrspace(1)* @var
+ store volatile float %val5, float addrspace(1)* @var
+ store volatile float %val6, float addrspace(1)* @var
+ store volatile float %val7, float addrspace(1)* @var
+ store volatile float %val8, float addrspace(1)* @var
+ store volatile float %val9, float addrspace(1)* @var
+ store volatile float %val10, float addrspace(1)* @var
+ store volatile float %val11, float addrspace(1)* @var
+ store volatile float %val12, float addrspace(1)* @var
+ store volatile float %val13, float addrspace(1)* @var
+ store volatile float %val14, float addrspace(1)* @var
+ store volatile float %val15, float addrspace(1)* @var
+ store volatile float %val16, float addrspace(1)* @var
+ store volatile float %val17, float addrspace(1)* @var
+ store volatile float %val18, float addrspace(1)* @var
+ store volatile float %val19, float addrspace(1)* @var
+ store volatile float %val20, float addrspace(1)* @var
+ store volatile float %val21, float addrspace(1)* @var
+ store volatile float %val22, float addrspace(1)* @var
+ store volatile float %val23, float addrspace(1)* @var
+ store volatile float %val24, float addrspace(1)* @var
+ store volatile float %val25, float addrspace(1)* @var
+ store volatile float %val26, float addrspace(1)* @var
+ store volatile float %val27, float addrspace(1)* @var
+ store volatile float %val28, float addrspace(1)* @var
+ store volatile float %val29, float addrspace(1)* @var
+ store volatile float %val30, float addrspace(1)* @var
+
+ ret void
+}
+attributes #1 = {"amdgpu-num-vgpr"="20"}
diff --git a/llvm/test/CodeGen/AMDGPU/attr-unparseable.ll b/llvm/test/CodeGen/AMDGPU/attr-unparseable.ll
new file mode 100644
index 00000000000..0282bc34c0e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/attr-unparseable.ll
@@ -0,0 +1,57 @@
+; RUN: not llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s 2>&1 | FileCheck %s
+
+; CHECK: can't parse integer attribute amdgpu-num-sgpr
+define void @unparseable_single_0() #0 {
+entry:
+ ret void
+}
+attributes #0 = {"amdgpu-num-sgpr"}
+
+; CHECK: can't parse integer attribute amdgpu-num-sgpr
+define void @unparseable_single_1() #1 {
+entry:
+ ret void
+}
+attributes #1 = {"amdgpu-num-sgpr"="k"}
+
+; CHECK: can't parse integer attribute amdgpu-num-sgpr
+define void @unparseable_single_2() #2 {
+entry:
+ ret void
+}
+attributes #2 = {"amdgpu-num-sgpr"="1,2"}
+
+; CHECK: can't parse first integer attribute amdgpu-flat-work-group-size
+define void @unparseable_pair_0() #3 {
+entry:
+ ret void
+}
+attributes #3 = {"amdgpu-flat-work-group-size"}
+
+; CHECK: can't parse first integer attribute amdgpu-flat-work-group-size
+define void @unparseable_pair_1() #4 {
+entry:
+ ret void
+}
+attributes #4 = {"amdgpu-flat-work-group-size"="k"}
+
+; CHECK: can't parse second integer attribute amdgpu-flat-work-group-size
+define void @unparseable_pair_2() #5 {
+entry:
+ ret void
+}
+attributes #5 = {"amdgpu-flat-work-group-size"="1"}
+
+; CHECK: can't parse second integer attribute amdgpu-flat-work-group-size
+define void @unparseable_pair_3() #6 {
+entry:
+ ret void
+}
+attributes #6 = {"amdgpu-flat-work-group-size"="1,k"}
+
+; CHECK: can't parse second integer attribute amdgpu-flat-work-group-size
+define void @unparseable_pair_4() #7 {
+entry:
+ ret void
+}
+attributes #7 = {"amdgpu-flat-work-group-size"="1,2,3"}
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-private-64.ll b/llvm/test/CodeGen/AMDGPU/indirect-private-64.ll
index 1f851f9de53..4c9ef2e61f8 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-private-64.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-private-64.ll
@@ -121,4 +121,4 @@ define void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* noalias %out, <
}
attributes #0 = { convergent nounwind }
-attributes #1 = { nounwind "amdgpu-max-waves-per-eu"="2" "amdgpu-max-work-group-size"="64" }
+attributes #1 = { nounwind "amdgpu-waves-per-eu"="1,2" "amdgpu-flat-work-group-size"="64,64" }
diff --git a/llvm/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll b/llvm/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll
index 79cdaf555aa..906a688febd 100644
--- a/llvm/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll
+++ b/llvm/test/CodeGen/AMDGPU/large-work-group-promote-alloca.ll
@@ -255,10 +255,10 @@ entry:
}
attributes #0 = { nounwind "amdgpu-max-work-group-size"="63" }
-attributes #1 = { nounwind "amdgpu-max-waves-per-eu"="3" "amdgpu-max-work-group-size"="256" }
-attributes #2 = { nounwind "amdgpu-max-waves-per-eu"="1" "amdgpu-max-work-group-size"="1600" }
-attributes #3 = { nounwind "amdgpu-max-waves-per-eu"="0" }
-attributes #4 = { nounwind "amdgpu-max-waves-per-eu"="-1" }
-attributes #5 = { nounwind "amdgpu-max-waves-per-eu"="6" "amdgpu-max-work-group-size"="64" }
-attributes #6 = { nounwind "amdgpu-max-waves-per-eu"="8" "amdgpu-max-work-group-size"="64" }
-attributes #7 = { nounwind "amdgpu-max-waves-per-eu"="9" "amdgpu-max-work-group-size"="64" }
+attributes #1 = { nounwind "amdgpu-waves-per-eu"="1,3" "amdgpu-flat-work-group-size"="256,256" }
+attributes #2 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1600,1600" }
+attributes #3 = { nounwind "amdgpu-waves-per-eu"="1,10" }
+attributes #4 = { nounwind "amdgpu-waves-per-eu"="1,10" }
+attributes #5 = { nounwind "amdgpu-waves-per-eu"="1,6" "amdgpu-flat-work-group-size"="64,64" }
+attributes #6 = { nounwind "amdgpu-waves-per-eu"="1,8" "amdgpu-flat-work-group-size"="64,64" }
+attributes #7 = { nounwind "amdgpu-waves-per-eu"="1,9" "amdgpu-flat-work-group-size"="64,64" }
diff --git a/llvm/test/CodeGen/AMDGPU/large-work-group-registers.ll b/llvm/test/CodeGen/AMDGPU/large-work-group-registers.ll
deleted file mode 100644
index 4463c193494..00000000000
--- a/llvm/test/CodeGen/AMDGPU/large-work-group-registers.ll
+++ /dev/null
@@ -1,41 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=tonga -regalloc=basic -post-RA-scheduler=0 < %s | FileCheck %s
-
-; CHECK: NumVgprs: 64
-define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <8 x i32>] addrspace(2)* byval, [16 x <8 x i32>] addrspace(2)* byval, [16 x <4 x i32>] addrspace(2)* byval, <3 x i32> inreg, <3 x i32> inreg, <3 x i32>) #0 {
-main_body:
- %8 = getelementptr [16 x <4 x i32>], [16 x <4 x i32>] addrspace(2)* %4, i64 0, i64 8
- %9 = load <4 x i32>, <4 x i32> addrspace(2)* %8, align 16, !tbaa !0
- %10 = extractelement <3 x i32> %7, i32 0
- %11 = extractelement <3 x i32> %7, i32 1
- %12 = mul i32 %10, %11
- %bc = bitcast <3 x i32> %7 to <3 x float>
- %13 = extractelement <3 x float> %bc, i32 1
- %14 = insertelement <512 x float> undef, float %13, i32 %12
- call void @llvm.amdgcn.s.barrier()
- %15 = extractelement <3 x i32> %6, i32 0
- %16 = extractelement <3 x i32> %7, i32 0
- %17 = shl i32 %15, 5
- %18 = add i32 %17, %16
- %19 = shl i32 %18, 4
- %20 = extractelement <3 x i32> %7, i32 1
- %21 = shl i32 %20, 2
- %22 = sext i32 %21 to i64
- %23 = getelementptr i8, i8 addrspace(3)* null, i64 %22
- %24 = bitcast i8 addrspace(3)* %23 to i32 addrspace(3)*
- %25 = load i32, i32 addrspace(3)* %24, align 4
- %26 = extractelement <512 x float> %14, i32 %25
- %27 = insertelement <4 x float> undef, float %26, i32 0
- call void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float> %27, <4 x i32> %9, i32 0, i32 %19, i1 false, i1 false)
- ret void
-}
-
-declare void @llvm.amdgcn.s.barrier() #1
-
-declare void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #2
-
-attributes #0 = { "amdgpu-max-work-group-size"="1024" }
-attributes #1 = { convergent nounwind }
-attributes #2 = { nounwind }
-
-!0 = !{!1, !1, i64 0, i32 1}
-!1 = !{!"const", null}
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
index 31ff7d931f1..7d557a20d5a 100644
--- a/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
@@ -343,8 +343,8 @@ define void @constant_sextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <
; FUNC-LABEL: {{^}}constant_zextload_v32i16_to_v32i32:
; GCN-DAG: s_load_dwordx16
; GCN-DAG: s_mov_b32 [[K:s[0-9]+]], 0xffff{{$}}
-; GCN-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[K]]
; GCN-DAG: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 16
+; GCN-DAG: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, [[K]]
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 0, #1
; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, {{T[0-9]+\.[XYZW]}}, 16, #1
diff --git a/llvm/test/CodeGen/AMDGPU/private-memory-r600.ll b/llvm/test/CodeGen/AMDGPU/private-memory-r600.ll
index 883bdc1ce26..3e1796959aa 100644
--- a/llvm/test/CodeGen/AMDGPU/private-memory-r600.ll
+++ b/llvm/test/CodeGen/AMDGPU/private-memory-r600.ll
@@ -297,4 +297,4 @@ define void @ptrtoint(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
; OPT: !0 = !{i32 0, i32 2048}
-attributes #0 = { nounwind "amdgpu-max-waves-per-eu"="2" }
+attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,2" }
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll
index 01ecb638b03..9cea1a23ea9 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll
@@ -61,5 +61,5 @@ define void @promote_with_objectsize(i32 addrspace(1)* %out) #0 {
ret void
}
-attributes #0 = { nounwind "amdgpu-max-work-group-size"="64" "amdgpu-max-waves-per-eu"="3" }
+attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,64" "amdgpu-waves-per-eu"="1,3" }
attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll
index 7c5a5182bc8..8ba849e5f88 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-no-opts.ll
@@ -34,5 +34,5 @@ entry:
ret void
}
-attributes #0 = { nounwind "amdgpu-max-work-group-size"="64" }
-attributes #1 = { nounwind optnone noinline "amdgpu-max-work-group-size"="64" }
+attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,64" }
+attributes #1 = { nounwind optnone noinline "amdgpu-flat-work-group-size"="64,64" }
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll
index 46fe307a17f..468a789e4a6 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-padding-size-estimate.ll
@@ -127,4 +127,4 @@ entry:
ret void
}
-attributes #0 = { nounwind "amdgpu-max-work-group-size"="64" }
+attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,64" "amdgpu-waves-per-eu"="1,7" }
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll
index 857e547aa03..2e7527dbdbc 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-icmp.ll
@@ -61,4 +61,4 @@ define void @lds_promoted_alloca_icmp_unknown_ptr(i32 addrspace(1)* %out, i32 %a
declare i32* @get_unknown_pointer() #0
-attributes #0 = { nounwind "amdgpu-max-waves-per-eu"="1" }
+attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" }
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll
index a0ad564a6c8..0462a351c39 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-phi.ll
@@ -201,4 +201,4 @@ for.body: ; preds = %for.body, %for.body
declare i32* @get_unknown_pointer() #0
-attributes #0 = { nounwind "amdgpu-max-waves-per-eu"="1" }
+attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" }
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll
index bb13adb1985..34d274df738 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-to-lds-select.ll
@@ -129,5 +129,5 @@ bb:
ret void
}
-attributes #0 = { norecurse nounwind "amdgpu-max-waves-per-eu"="1" }
+attributes #0 = { norecurse nounwind "amdgpu-waves-per-eu"="1,1" }
attributes #1 = { norecurse nounwind } \ No newline at end of file
diff --git a/llvm/test/CodeGen/AMDGPU/target-cpu.ll b/llvm/test/CodeGen/AMDGPU/target-cpu.ll
index c1662acbf2a..cf80ff3f4c8 100644
--- a/llvm/test/CodeGen/AMDGPU/target-cpu.ll
+++ b/llvm/test/CodeGen/AMDGPU/target-cpu.ll
@@ -108,5 +108,5 @@ attributes #1 = { nounwind readnone }
attributes #2 = { nounwind "target-cpu"="tahiti" }
attributes #3 = { nounwind "target-cpu"="bonaire" }
attributes #4 = { nounwind "target-cpu"="fiji" }
-attributes #5 = { nounwind "target-features"="+promote-alloca" "amdgpu-max-waves-per-eu"="3" }
-attributes #6 = { nounwind "target-features"="-promote-alloca" "amdgpu-max-waves-per-eu"="3" }
+attributes #5 = { nounwind "target-features"="+promote-alloca" "amdgpu-waves-per-eu"="1,3" }
+attributes #6 = { nounwind "target-features"="-promote-alloca" "amdgpu-waves-per-eu"="1,3" }
OpenPOWER on IntegriCloud