summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2019-06-17 16:48:56 +0000
committerStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2019-06-17 16:48:56 +0000
commit5d00c3060e11b1b8725c0af110f011c4d110d39a (patch)
treed962c6e2237356210cecef017162f24512917c71 /llvm/test
parent8b1c53b52818aac8cbf5e9acb168e6315e5cbc6b (diff)
downloadbcm5719-llvm-5d00c3060e11b1b8725c0af110f011c4d110d39a.tar.gz
bcm5719-llvm-5d00c3060e11b1b8725c0af110f011c4d110d39a.zip
[AMDGPU] gfx1010 wave32 metadata
Differential Revision: https://reviews.llvm.org/D63207 llvm-svn: 363577
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll14
-rw-r--r--llvm/test/CodeGen/AMDGPU/hsa-metadata-wavefrontsize.ll14
-rw-r--r--llvm/test/MC/AMDGPU/hsa-diag-v3.s41
-rw-r--r--llvm/test/MC/AMDGPU/hsa-gfx10-v3.s223
-rw-r--r--llvm/test/MC/AMDGPU/hsa-gfx10.s284
-rw-r--r--llvm/test/MC/AMDGPU/hsa-wave-size.s65
-rw-r--r--llvm/test/MC/AMDGPU/hsa.s4
-rw-r--r--llvm/test/MC/AMDGPU/hsa_isa_version_attrs.s2
8 files changed, 640 insertions, 7 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll
index d30fb29f10a..ae1d7135436 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll
@@ -1,6 +1,7 @@
-; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s
-; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes | FileCheck --check-prefix=CHECK --check-prefix=GFX803 --check-prefix=NOTES %s
-; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s
+; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -enable-misched=0 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=WAVE64 --check-prefix=NOTES %s
+; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -enable-misched=0 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX803 --check-prefix=WAVE64 --check-prefix=NOTES %s
+; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=WAVE64 --check-prefix=NOTES %s
+; run: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -enable-misched=0 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX1010 --check-prefix=WAVE32 --check-prefix=NOTES %s
@var = addrspace(1) global float 0.0
@@ -14,10 +15,12 @@
; CHECK: .max_flat_workgroup_size: 256
; CHECK: .name: test
; CHECK: .private_segment_fixed_size: 0
-; CHECK: .sgpr_count: 8
+; WAVE64: .sgpr_count: 8
+; WAVE32: .sgpr_count: 10
; CHECK: .symbol: test.kd
; CHECK: .vgpr_count: 6
-; CHECK: .wavefront_size: 64
+; WAVE64: .wavefront_size: 64
+; WAVE32: .wavefront_size: 32
define amdgpu_kernel void @test(
half addrspace(1)* %r,
half addrspace(1)* %a,
@@ -34,6 +37,7 @@ entry:
; GFX700: .sgpr_spill_count: 40
; GFX803: .sgpr_spill_count: 24
; GFX900: .sgpr_spill_count: 24
+; GFX1010: .sgpr_spill_count: 24
; CHECK: .symbol: num_spilled_sgprs.kd
define amdgpu_kernel void @num_spilled_sgprs(
i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, [8 x i32],
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-wavefrontsize.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-wavefrontsize.ll
new file mode 100644
index 00000000000..a30184d6814
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-wavefrontsize.ll
@@ -0,0 +1,14 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-code-object-v3,+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefixes=GCN,GFX10-32 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-code-object-v3,-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefixes=GCN,GFX10-64 %s
+
+; GCN: ---
+; GCN: Kernels:
+; GCN: - Name: wavefrontsize
+; GCN: CodeProps:
+; GFX10-32: WavefrontSize: 32
+; GFX10-64: WavefrontSize: 64
+; GCN: ...
+define amdgpu_kernel void @wavefrontsize() {
+entry:
+ ret void
+}
diff --git a/llvm/test/MC/AMDGPU/hsa-diag-v3.s b/llvm/test/MC/AMDGPU/hsa-diag-v3.s
index 5f2d89da75e..8a793ca49b1 100644
--- a/llvm/test/MC/AMDGPU/hsa-diag-v3.s
+++ b/llvm/test/MC/AMDGPU/hsa-diag-v3.s
@@ -1,4 +1,5 @@
// RUN: not llvm-mc -mattr=+code-object-v3 -triple amdgcn-amd-amdhsa -mcpu=gfx803 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s
+// RUN: not llvm-mc -mattr=+code-object-v3 -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefix=GFX10
// RUN: not llvm-mc -mattr=+code-object-v3 -triple amdgcn-amd- -mcpu=gfx803 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefix=NOT-AMDHSA
.text
@@ -44,6 +45,46 @@
// CHECK: error: expected .amdhsa_ directive or .end_amdhsa_kernel
.end_amdhsa_kernel
+.amdhsa_kernel foo
+ .amdhsa_wavefront_size32 1
+ // CHECK: error: directive requires gfx10+
+.end_amdhsa_kernel
+
+.amdhsa_kernel foo
+ .amdhsa_workgroup_processor_mode 1
+ // CHECK: error: directive requires gfx10+
+.end_amdhsa_kernel
+
+.amdhsa_kernel foo
+ .amdhsa_memory_ordered 1
+ // CHECK: error: directive requires gfx10+
+.end_amdhsa_kernel
+
+.amdhsa_kernel foo
+ .amdhsa_forward_progress 1
+ // CHECK: error: directive requires gfx10+
+.end_amdhsa_kernel
+
+.amdhsa_kernel foo
+ .amdhsa_wavefront_size32 5
+ // GFX10: error: value out of range
+.end_amdhsa_kernel
+
+.amdhsa_kernel foo
+ .amdhsa_workgroup_processor_mode 5
+ // GFX10: error: value out of range
+.end_amdhsa_kernel
+
+.amdhsa_kernel foo
+ .amdhsa_memory_ordered 5
+ // GFX10: error: value out of range
+.end_amdhsa_kernel
+
+.amdhsa_kernel foo
+ .amdhsa_forward_progress 5
+ // GFX10: error: value out of range
+.end_amdhsa_kernel
+
.set .amdgcn.next_free_vgpr, "foo"
v_mov_b32_e32 v0, s0
// CHECK: error: .amdgcn.next_free_{v,s}gpr symbols must be absolute expressions
diff --git a/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s
new file mode 100644
index 00000000000..ef078738856
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s
@@ -0,0 +1,223 @@
+// RUN: llvm-mc -mattr=+code-object-v3 -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
+// RUN: llvm-mc -mattr=+code-object-v3 -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack -filetype=obj < %s > %t
+// RUN: llvm-readobj -elf-output-style=GNU -sections -symbols -relocations %t | FileCheck --check-prefix=READOBJ %s
+// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
+
+// big endian not supported
+// XFAIL: powerpc-, powerpc64-, s390x, mips-, mips64-, sparc
+
+// READOBJ: Section Headers
+// READOBJ: .text PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256
+// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 0000c0 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64
+
+// READOBJ: Relocation section '.rela.rodata' at offset
+// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10
+// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110
+// READOBJ: 0000000000000090 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 210
+
+// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries:
+// READOBJ: {{[0-9]+}}: 0000000000000100 0 FUNC LOCAL PROTECTED 2 complete
+// READOBJ: {{[0-9]+}}: 0000000000000040 64 OBJECT LOCAL DEFAULT 3 complete.kd
+// READOBJ: {{[0-9]+}}: 0000000000000000 0 FUNC LOCAL PROTECTED 2 minimal
+// READOBJ: {{[0-9]+}}: 0000000000000000 64 OBJECT LOCAL DEFAULT 3 minimal.kd
+// READOBJ: {{[0-9]+}}: 0000000000000200 0 FUNC LOCAL PROTECTED 2 special_sgpr
+// READOBJ: {{[0-9]+}}: 0000000000000080 64 OBJECT LOCAL DEFAULT 3 special_sgpr.kd
+
+// OBJDUMP: Contents of section .rodata
+// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here.
+// minimal
+// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0030 0000ac60 80000000 00000000 00000000
+// complete
+// OBJDUMP-NEXT: 0040 01000000 01000000 00000000 00000000
+// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0070 015001e4 1f0f007f 7f040000 00000000
+// special_sgpr
+// OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 00a0 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 00b0 00000060 80000000 00000000 00000000
+
+.text
+// ASM: .text
+
+.amdgcn_target "amdgcn-amd-amdhsa--gfx1010+xnack"
+// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010+xnack"
+
+.p2align 8
+.type minimal,@function
+minimal:
+ s_endpgm
+
+.p2align 8
+.type complete,@function
+complete:
+ s_endpgm
+
+.p2align 8
+.type special_sgpr,@function
+special_sgpr:
+ s_endpgm
+
+.rodata
+// ASM: .rodata
+
+// Test that only specifying required directives is allowed, and that defaulted
+// values are omitted.
+.p2align 6
+.amdhsa_kernel minimal
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+.end_amdhsa_kernel
+
+// ASM: .amdhsa_kernel minimal
+// ASM: .amdhsa_next_free_vgpr 0
+// ASM-NEXT: .amdhsa_next_free_sgpr 0
+// ASM: .end_amdhsa_kernel
+
+// Test that we can specify all available directives with non-default values.
+.p2align 6
+.amdhsa_kernel complete
+ .amdhsa_group_segment_fixed_size 1
+ .amdhsa_private_segment_fixed_size 1
+ .amdhsa_user_sgpr_private_segment_buffer 1
+ .amdhsa_user_sgpr_dispatch_ptr 1
+ .amdhsa_user_sgpr_queue_ptr 1
+ .amdhsa_user_sgpr_kernarg_segment_ptr 1
+ .amdhsa_user_sgpr_dispatch_id 1
+ .amdhsa_user_sgpr_flat_scratch_init 1
+ .amdhsa_user_sgpr_private_segment_size 1
+ .amdhsa_wavefront_size32 1
+ .amdhsa_system_sgpr_private_segment_wavefront_offset 1
+ .amdhsa_system_sgpr_workgroup_id_x 0
+ .amdhsa_system_sgpr_workgroup_id_y 1
+ .amdhsa_system_sgpr_workgroup_id_z 1
+ .amdhsa_system_sgpr_workgroup_info 1
+ .amdhsa_system_vgpr_workitem_id 1
+ .amdhsa_next_free_vgpr 9
+ .amdhsa_next_free_sgpr 27
+ .amdhsa_reserve_vcc 0
+ .amdhsa_reserve_flat_scratch 0
+ .amdhsa_reserve_xnack_mask 0
+ .amdhsa_float_round_mode_32 1
+ .amdhsa_float_round_mode_16_64 1
+ .amdhsa_float_denorm_mode_32 1
+ .amdhsa_float_denorm_mode_16_64 0
+ .amdhsa_dx10_clamp 0
+ .amdhsa_ieee_mode 0
+ .amdhsa_fp16_overflow 1
+ .amdhsa_workgroup_processor_mode 1
+ .amdhsa_memory_ordered 1
+ .amdhsa_forward_progress 1
+ .amdhsa_exception_fp_ieee_invalid_op 1
+ .amdhsa_exception_fp_denorm_src 1
+ .amdhsa_exception_fp_ieee_div_zero 1
+ .amdhsa_exception_fp_ieee_overflow 1
+ .amdhsa_exception_fp_ieee_underflow 1
+ .amdhsa_exception_fp_ieee_inexact 1
+ .amdhsa_exception_int_div_zero 1
+.end_amdhsa_kernel
+
+// ASM: .amdhsa_kernel complete
+// ASM-NEXT: .amdhsa_group_segment_fixed_size 1
+// ASM-NEXT: .amdhsa_private_segment_fixed_size 1
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1
+// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1
+// ASM-NEXT: .amdhsa_wavefront_size32 1
+// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1
+// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 1
+// ASM-NEXT: .amdhsa_next_free_vgpr 9
+// ASM-NEXT: .amdhsa_next_free_sgpr 27
+// ASM-NEXT: .amdhsa_reserve_vcc 0
+// ASM-NEXT: .amdhsa_reserve_flat_scratch 0
+// ASM-NEXT: .amdhsa_reserve_xnack_mask 0
+// ASM-NEXT: .amdhsa_float_round_mode_32 1
+// ASM-NEXT: .amdhsa_float_round_mode_16_64 1
+// ASM-NEXT: .amdhsa_float_denorm_mode_32 1
+// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 0
+// ASM-NEXT: .amdhsa_dx10_clamp 0
+// ASM-NEXT: .amdhsa_ieee_mode 0
+// ASM-NEXT: .amdhsa_fp16_overflow 1
+// ASM-NEXT: .amdhsa_workgroup_processor_mode 1
+// ASM-NEXT: .amdhsa_memory_ordered 1
+// ASM-NEXT: .amdhsa_forward_progress 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
+// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1
+// ASM-NEXT: .amdhsa_exception_int_div_zero 1
+// ASM-NEXT: .end_amdhsa_kernel
+
+// Test that we are including special SGPR usage in the granulated count.
+.p2align 6
+.amdhsa_kernel special_sgpr
+ // Same next_free_sgpr as "complete", but...
+ .amdhsa_next_free_sgpr 27
+ // ...on GFX10+ this should require an additional 6 SGPRs, pushing us from
+ // 3 granules to 4
+ .amdhsa_reserve_flat_scratch 1
+
+ .amdhsa_reserve_vcc 0
+ .amdhsa_reserve_xnack_mask 0
+
+ .amdhsa_float_denorm_mode_16_64 0
+ .amdhsa_dx10_clamp 0
+ .amdhsa_ieee_mode 0
+ .amdhsa_next_free_vgpr 0
+.end_amdhsa_kernel
+
+// ASM: .amdhsa_kernel special_sgpr
+// ASM: .amdhsa_next_free_vgpr 0
+// ASM-NEXT: .amdhsa_next_free_sgpr 27
+// ASM-NEXT: .amdhsa_reserve_vcc 0
+// ASM-NEXT: .amdhsa_reserve_xnack_mask 0
+// ASM: .amdhsa_float_denorm_mode_16_64 0
+// ASM-NEXT: .amdhsa_dx10_clamp 0
+// ASM-NEXT: .amdhsa_ieee_mode 0
+// ASM: .end_amdhsa_kernel
+
+.section .foo
+
+.byte .amdgcn.gfx_generation_number
+// ASM: .byte 10
+
+.byte .amdgcn.next_free_vgpr
+// ASM: .byte 0
+.byte .amdgcn.next_free_sgpr
+// ASM: .byte 0
+
+v_mov_b32_e32 v7, s10
+
+.byte .amdgcn.next_free_vgpr
+// ASM: .byte 8
+.byte .amdgcn.next_free_sgpr
+// ASM: .byte 11
+
+.set .amdgcn.next_free_vgpr, 0
+.set .amdgcn.next_free_sgpr, 0
+
+.byte .amdgcn.next_free_vgpr
+// ASM: .byte 0
+.byte .amdgcn.next_free_sgpr
+// ASM: .byte 0
+
+v_mov_b32_e32 v16, s3
+
+.byte .amdgcn.next_free_vgpr
+// ASM: .byte 17
+.byte .amdgcn.next_free_sgpr
+// ASM: .byte 4
diff --git a/llvm/test/MC/AMDGPU/hsa-gfx10.s b/llvm/test/MC/AMDGPU/hsa-gfx10.s
new file mode 100644
index 00000000000..db868f300f0
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/hsa-gfx10.s
@@ -0,0 +1,284 @@
+// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64,-code-object-v3 -show-encoding %s | FileCheck %s --check-prefix=ASM
+// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64,-code-object-v3 -show-encoding %s | llvm-readobj -symbols -s -sd | FileCheck %s --check-prefix=ELF
+
+// ELF: Section {
+// ELF: Name: .text
+// ELF: Type: SHT_PROGBITS (0x1)
+// ELF: Flags [ (0x6)
+// ELF: SHF_ALLOC (0x2)
+// ELF: SHF_EXECINSTR (0x4)
+
+// ELF: SHT_NOTE
+// ELF: 0000: 04000000 08000000 01000000 414D4400
+// ELF: 0010: 02000000 00000000 04000000 1B000000
+// ELF: 0020: 03000000 414D4400 04000700 07000000
+// ELF: 0030: 00000000 00000000 414D4400 414D4447
+// ELF: 0040: 50550000
+// We can't check binary representation of metadata note: it is different on
+// Windows and Linux because of carriage return on Windows
+
+// ELF: Symbol {
+// ELF: Name: amd_kernel_code_t_minimal
+// ELF: Type: AMDGPU_HSA_KERNEL (0xA)
+// ELF: Section: .text
+// ELF: }
+// ELF: Symbol {
+// ELF: Name: amd_kernel_code_t_test_all
+// ELF: Type: AMDGPU_HSA_KERNEL (0xA)
+// ELF: Section: .text
+// ELF: }
+
+.text
+// ASM: .text
+
+.hsa_code_object_version 2,0
+// ASM: .hsa_code_object_version 2,0
+
+.hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
+// ASM: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
+
+.amd_amdgpu_hsa_metadata
+ Version: [ 3, 0 ]
+ Kernels:
+ - Name: amd_kernel_code_t_test_all
+ SymbolName: amd_kernel_code_t_test_all@kd
+ - Name: amd_kernel_code_t_minimal
+ SymbolName: amd_kernel_code_t_minimal@kd
+.end_amd_amdgpu_hsa_metadata
+
+// ASM: .amd_amdgpu_hsa_metadata
+// ASM: Version: [ 3, 0 ]
+// ASM: Kernels:
+// ASM: - Name: amd_kernel_code_t_test_all
+// ASM: SymbolName: 'amd_kernel_code_t_test_all@kd'
+// ASM: - Name: amd_kernel_code_t_minimal
+// ASM: SymbolName: 'amd_kernel_code_t_minimal@kd'
+// ASM: .end_amd_amdgpu_hsa_metadata
+
+.amdgpu_hsa_kernel amd_kernel_code_t_test_all
+.amdgpu_hsa_kernel amd_kernel_code_t_minimal
+
+amd_kernel_code_t_test_all:
+; Test all amd_kernel_code_t members with non-default values.
+.amd_kernel_code_t
+ kernel_code_version_major = 100
+ kernel_code_version_minor = 100
+ machine_kind = 0
+ machine_version_major = 5
+ machine_version_minor = 5
+ machine_version_stepping = 5
+ kernel_code_entry_byte_offset = 512
+ kernel_code_prefetch_byte_size = 1
+ max_scratch_backing_memory_byte_size = 1
+ compute_pgm_rsrc1_vgprs = 1
+ compute_pgm_rsrc1_sgprs = 1
+ compute_pgm_rsrc1_priority = 1
+ compute_pgm_rsrc1_float_mode = 1
+ compute_pgm_rsrc1_priv = 1
+ compute_pgm_rsrc1_dx10_clamp = 1
+ compute_pgm_rsrc1_debug_mode = 1
+ compute_pgm_rsrc1_ieee_mode = 1
+ compute_pgm_rsrc1_wgp_mode = 0
+ compute_pgm_rsrc1_mem_ordered = 0
+ compute_pgm_rsrc1_fwd_progress = 1
+ compute_pgm_rsrc2_scratch_en = 1
+ compute_pgm_rsrc2_user_sgpr = 1
+ compute_pgm_rsrc2_tgid_x_en = 1
+ compute_pgm_rsrc2_tgid_y_en = 1
+ compute_pgm_rsrc2_tgid_z_en = 1
+ compute_pgm_rsrc2_tg_size_en = 1
+ compute_pgm_rsrc2_tidig_comp_cnt = 1
+ compute_pgm_rsrc2_excp_en_msb = 1
+ compute_pgm_rsrc2_lds_size = 1
+ compute_pgm_rsrc2_excp_en = 1
+ enable_sgpr_private_segment_buffer = 1
+ enable_sgpr_dispatch_ptr = 1
+ enable_sgpr_queue_ptr = 1
+ enable_sgpr_kernarg_segment_ptr = 1
+ enable_sgpr_dispatch_id = 1
+ enable_sgpr_flat_scratch_init = 1
+ enable_sgpr_private_segment_size = 1
+ enable_sgpr_grid_workgroup_count_x = 1
+ enable_sgpr_grid_workgroup_count_y = 1
+ enable_sgpr_grid_workgroup_count_z = 1
+ enable_ordered_append_gds = 1
+ private_element_size = 1
+ is_ptr64 = 1
+ is_dynamic_callstack = 1
+ is_debug_enabled = 1
+ is_xnack_enabled = 1
+ workitem_private_segment_byte_size = 1
+ workgroup_group_segment_byte_size = 1
+ gds_segment_byte_size = 1
+ kernarg_segment_byte_size = 1
+ workgroup_fbarrier_count = 1
+ wavefront_sgpr_count = 1
+ workitem_vgpr_count = 1
+ reserved_vgpr_first = 1
+ reserved_vgpr_count = 1
+ reserved_sgpr_first = 1
+ reserved_sgpr_count = 1
+ debug_wavefront_private_segment_offset_sgpr = 1
+ debug_private_segment_buffer_sgpr = 1
+ kernarg_segment_alignment = 5
+ group_segment_alignment = 5
+ private_segment_alignment = 5
+ wavefront_size = 6
+ call_convention = 1
+ runtime_loader_kernel_symbol = 1
+.end_amd_kernel_code_t
+
+// ASM-LABEL: {{^}}amd_kernel_code_t_test_all:
+// ASM: .amd_kernel_code_t
+// ASM: amd_code_version_major = 100
+// ASM: amd_code_version_minor = 100
+// ASM: amd_machine_kind = 0
+// ASM: amd_machine_version_major = 5
+// ASM: amd_machine_version_minor = 5
+// ASM: amd_machine_version_stepping = 5
+// ASM: kernel_code_entry_byte_offset = 512
+// ASM: kernel_code_prefetch_byte_size = 1
+// ASM: granulated_workitem_vgpr_count = 1
+// ASM: granulated_wavefront_sgpr_count = 1
+// ASM: priority = 1
+// ASM: float_mode = 1
+// ASM: priv = 1
+// ASM: enable_dx10_clamp = 1
+// ASM: debug_mode = 1
+// ASM: enable_ieee_mode = 1
+// ASM: enable_wgp_mode = 0
+// ASM: enable_mem_ordered = 0
+// ASM: enable_fwd_progress = 1
+// ASM: enable_sgpr_private_segment_wave_byte_offset = 1
+// ASM: user_sgpr_count = 1
+// ASM: enable_sgpr_workgroup_id_x = 1
+// ASM: enable_sgpr_workgroup_id_y = 1
+// ASM: enable_sgpr_workgroup_id_z = 1
+// ASM: enable_sgpr_workgroup_info = 1
+// ASM: enable_vgpr_workitem_id = 1
+// ASM: enable_exception_msb = 1
+// ASM: granulated_lds_size = 1
+// ASM: enable_exception = 1
+// ASM: enable_sgpr_private_segment_buffer = 1
+// ASM: enable_sgpr_dispatch_ptr = 1
+// ASM: enable_sgpr_queue_ptr = 1
+// ASM: enable_sgpr_kernarg_segment_ptr = 1
+// ASM: enable_sgpr_dispatch_id = 1
+// ASM: enable_sgpr_flat_scratch_init = 1
+// ASM: enable_sgpr_private_segment_size = 1
+// ASM: enable_sgpr_grid_workgroup_count_x = 1
+// ASM: enable_sgpr_grid_workgroup_count_y = 1
+// ASM: enable_sgpr_grid_workgroup_count_z = 1
+// ASM: enable_ordered_append_gds = 1
+// ASM: private_element_size = 1
+// ASM: is_ptr64 = 1
+// ASM: is_dynamic_callstack = 1
+// ASM: is_debug_enabled = 1
+// ASM: is_xnack_enabled = 1
+// ASM: workitem_private_segment_byte_size = 1
+// ASM: workgroup_group_segment_byte_size = 1
+// ASM: gds_segment_byte_size = 1
+// ASM: kernarg_segment_byte_size = 1
+// ASM: workgroup_fbarrier_count = 1
+// ASM: wavefront_sgpr_count = 1
+// ASM: workitem_vgpr_count = 1
+// ASM: reserved_vgpr_first = 1
+// ASM: reserved_vgpr_count = 1
+// ASM: reserved_sgpr_first = 1
+// ASM: reserved_sgpr_count = 1
+// ASM: debug_wavefront_private_segment_offset_sgpr = 1
+// ASM: debug_private_segment_buffer_sgpr = 1
+// ASM: kernarg_segment_alignment = 5
+// ASM: group_segment_alignment = 5
+// ASM: private_segment_alignment = 5
+// ASM: wavefront_size = 6
+// ASM: call_convention = 1
+// ASM: runtime_loader_kernel_symbol = 1
+// ASM: .end_amd_kernel_code_t
+
+amd_kernel_code_t_minimal:
+.amd_kernel_code_t
+ enable_sgpr_kernarg_segment_ptr = 1
+ is_ptr64 = 1
+ granulated_workitem_vgpr_count = 1
+ granulated_wavefront_sgpr_count = 1
+ user_sgpr_count = 2
+ kernarg_segment_byte_size = 16
+ wavefront_sgpr_count = 8
+// wavefront_sgpr_count = 7
+; wavefront_sgpr_count = 7
+// Make sure a blank line won't break anything:
+
+// Make sure a line with whitespace won't break anything:
+
+ workitem_vgpr_count = 16
+.end_amd_kernel_code_t
+
+// ASM-LABEL: {{^}}amd_kernel_code_t_minimal:
+// ASM: .amd_kernel_code_t
+// ASM: amd_code_version_major = 1
+// ASM: amd_code_version_minor = 2
+// ASM: amd_machine_kind = 1
+// ASM: amd_machine_version_major = 10
+// ASM: amd_machine_version_minor = 1
+// ASM: amd_machine_version_stepping = 0
+// ASM: kernel_code_entry_byte_offset = 256
+// ASM: kernel_code_prefetch_byte_size = 0
+// ASM: granulated_workitem_vgpr_count = 1
+// ASM: granulated_wavefront_sgpr_count = 1
+// ASM: priority = 0
+// ASM: float_mode = 0
+// ASM: priv = 0
+// ASM: enable_dx10_clamp = 0
+// ASM: debug_mode = 0
+// ASM: enable_ieee_mode = 0
+// ASM: enable_wgp_mode = 1
+// ASM: enable_mem_ordered = 1
+// ASM: enable_fwd_progress = 0
+// ASM: enable_sgpr_private_segment_wave_byte_offset = 0
+// ASM: user_sgpr_count = 2
+// ASM: enable_sgpr_workgroup_id_x = 0
+// ASM: enable_sgpr_workgroup_id_y = 0
+// ASM: enable_sgpr_workgroup_id_z = 0
+// ASM: enable_sgpr_workgroup_info = 0
+// ASM: enable_vgpr_workitem_id = 0
+// ASM: enable_exception_msb = 0
+// ASM: granulated_lds_size = 0
+// ASM: enable_exception = 0
+// ASM: enable_sgpr_private_segment_buffer = 0
+// ASM: enable_sgpr_dispatch_ptr = 0
+// ASM: enable_sgpr_queue_ptr = 0
+// ASM: enable_sgpr_kernarg_segment_ptr = 1
+// ASM: enable_sgpr_dispatch_id = 0
+// ASM: enable_sgpr_flat_scratch_init = 0
+// ASM: enable_sgpr_private_segment_size = 0
+// ASM: enable_sgpr_grid_workgroup_count_x = 0
+// ASM: enable_sgpr_grid_workgroup_count_y = 0
+// ASM: enable_sgpr_grid_workgroup_count_z = 0
+// ASM: enable_wavefront_size32 = 0
+// ASM: enable_ordered_append_gds = 0
+// ASM: private_element_size = 0
+// ASM: is_ptr64 = 1
+// ASM: is_dynamic_callstack = 0
+// ASM: is_debug_enabled = 0
+// ASM: is_xnack_enabled = 0
+// ASM: workitem_private_segment_byte_size = 0
+// ASM: workgroup_group_segment_byte_size = 0
+// ASM: gds_segment_byte_size = 0
+// ASM: kernarg_segment_byte_size = 16
+// ASM: workgroup_fbarrier_count = 0
+// ASM: wavefront_sgpr_count = 8
+// ASM: workitem_vgpr_count = 16
+// ASM: reserved_vgpr_first = 0
+// ASM: reserved_vgpr_count = 0
+// ASM: reserved_sgpr_first = 0
+// ASM: reserved_sgpr_count = 0
+// ASM: debug_wavefront_private_segment_offset_sgpr = 0
+// ASM: debug_private_segment_buffer_sgpr = 0
+// ASM: kernarg_segment_alignment = 4
+// ASM: group_segment_alignment = 4
+// ASM: private_segment_alignment = 4
+// ASM: wavefront_size = 6
+// ASM: call_convention = -1
+// ASM: runtime_loader_kernel_symbol = 0
+// ASM: .end_amd_kernel_code_t
diff --git a/llvm/test/MC/AMDGPU/hsa-wave-size.s b/llvm/test/MC/AMDGPU/hsa-wave-size.s
new file mode 100644
index 00000000000..8785895ce18
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/hsa-wave-size.s
@@ -0,0 +1,65 @@
+// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -mattr=-code-object-v3 %s | FileCheck --check-prefixes=GCN,GFX7 %s
+// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-code-object-v3,+WavefrontSize32,-WavefrontSize64 %s | FileCheck --check-prefixes=GCN,GFX10-W32 %s
+// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-code-object-v3,-WavefrontSize32,+WavefrontSize64 %s | FileCheck --check-prefixes=GCN,GFX10-W64 %s
+
+// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -mattr=-code-object-v3 %s 2>&1 | FileCheck --check-prefixes=GCN-ERR,GFX7-ERR %s
+// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-code-object-v3,+WavefrontSize32,-WavefrontSize64 %s 2>&1 | FileCheck --check-prefixes=GCN-ERR,GFX10-W32-ERR %s
+// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-code-object-v3,-WavefrontSize32,+WavefrontSize64 %s 2>&1 | FileCheck --check-prefixes=GCN-ERR,GFX10-W64-ERR %s
+
+// GCN: test0:
+// GFX7: enable_wavefront_size32 = 0
+// GFX7: wavefront_size = 6
+// GFX10-W32: enable_wavefront_size32 = 1
+// GFX10-W32: wavefront_size = 5
+// GFX10-W64: enable_wavefront_size32 = 0
+// GFX10-W64: wavefront_size = 6
+.amdgpu_hsa_kernel test0
+test0:
+.amd_kernel_code_t
+.end_amd_kernel_code_t
+
+// GCN: test1:
+// GFX7: enable_wavefront_size32 = 0
+// GFX7: wavefront_size = 6
+// GFX10-W32-ERR: error: enable_wavefront_size32=0 requires +WavefrontSize64
+// GFX10-W64: enable_wavefront_size32 = 0
+// GFX10-W64: wavefront_size = 6
+.amdgpu_hsa_kernel test1
+test1:
+.amd_kernel_code_t
+ enable_wavefront_size32 = 0
+.end_amd_kernel_code_t
+
+// GCN: test2:
+// GFX7: enable_wavefront_size32 = 0
+// GFX7: wavefront_size = 6
+// GFX10-W32-ERR: error: wavefront_size=6 requires +WavefrontSize64
+// GFX10-W64: enable_wavefront_size32 = 0
+// GFX10-W64: wavefront_size = 6
+.amdgpu_hsa_kernel test2
+test2:
+.amd_kernel_code_t
+ wavefront_size = 6
+.end_amd_kernel_code_t
+
+// GCN: test3:
+// GFX7-ERR: error: enable_wavefront_size32=1 is only allowed on GFX10+
+// GFX10-W32: enable_wavefront_size32 = 1
+// GFX10-W32: wavefront_size = 5
+// GFX10-W64-ERR: error: enable_wavefront_size32=1 requires +WavefrontSize32
+.amdgpu_hsa_kernel test3
+test3:
+.amd_kernel_code_t
+ enable_wavefront_size32 = 1
+.end_amd_kernel_code_t
+
+// GCN: test4:
+// GFX7-ERR: error: wavefront_size=5 is only allowed on GFX10+
+// GFX10-W32: enable_wavefront_size32 = 1
+// GFX10-W32: wavefront_size = 5
+// GFX10-W64-ERR: error: wavefront_size=5 requires +WavefrontSize32
+.amdgpu_hsa_kernel test4
+test4:
+.amd_kernel_code_t
+ wavefront_size = 5
+.end_amd_kernel_code_t
diff --git a/llvm/test/MC/AMDGPU/hsa.s b/llvm/test/MC/AMDGPU/hsa.s
index 9eda20da063..87c09648e51 100644
--- a/llvm/test/MC/AMDGPU/hsa.s
+++ b/llvm/test/MC/AMDGPU/hsa.s
@@ -120,7 +120,7 @@ amd_kernel_code_t_test_all:
kernarg_segment_alignment = 5
group_segment_alignment = 5
private_segment_alignment = 5
- wavefront_size = 5
+ wavefront_size = 6
call_convention = 1
runtime_loader_kernel_symbol = 1
.end_amd_kernel_code_t
@@ -185,7 +185,7 @@ amd_kernel_code_t_test_all:
// ASM: kernarg_segment_alignment = 5
// ASM: group_segment_alignment = 5
// ASM: private_segment_alignment = 5
-// ASM: wavefront_size = 5
+// ASM: wavefront_size = 6
// ASM: call_convention = 1
// ASM: runtime_loader_kernel_symbol = 1
// ASM: .end_amd_kernel_code_t
diff --git a/llvm/test/MC/AMDGPU/hsa_isa_version_attrs.s b/llvm/test/MC/AMDGPU/hsa_isa_version_attrs.s
index ddd76fcf918..0a5686e27e8 100644
--- a/llvm/test/MC/AMDGPU/hsa_isa_version_attrs.s
+++ b/llvm/test/MC/AMDGPU/hsa_isa_version_attrs.s
@@ -1,6 +1,8 @@
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx801 -mattr=-code-object-v3,-fast-fmaf -show-encoding %s | FileCheck --check-prefix=GFX8 %s
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -mattr=-code-object-v3,-mad-mix-insts -show-encoding %s | FileCheck --check-prefix=GFX9 %s
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-code-object-v3,-WavefrontSize32 -show-encoding %s | FileCheck --check-prefix=GFX10 %s
.hsa_code_object_isa
// GFX8: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
// GFX9: .hsa_code_object_isa 9,0,0,"AMD","AMDGPU"
+// GFX10: .hsa_code_object_isa 10,1,0,"AMD","AMDGPU"
OpenPOWER on IntegriCloud