diff options
author | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2019-06-17 16:48:56 +0000 |
---|---|---|
committer | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2019-06-17 16:48:56 +0000 |
commit | 5d00c3060e11b1b8725c0af110f011c4d110d39a (patch) | |
tree | d962c6e2237356210cecef017162f24512917c71 /llvm/test/MC/AMDGPU | |
parent | 8b1c53b52818aac8cbf5e9acb168e6315e5cbc6b (diff) | |
download | bcm5719-llvm-5d00c3060e11b1b8725c0af110f011c4d110d39a.tar.gz bcm5719-llvm-5d00c3060e11b1b8725c0af110f011c4d110d39a.zip |
[AMDGPU] gfx1010 wave32 metadata
Differential Revision: https://reviews.llvm.org/D63207
llvm-svn: 363577
Diffstat (limited to 'llvm/test/MC/AMDGPU')
-rw-r--r-- | llvm/test/MC/AMDGPU/hsa-diag-v3.s | 41 | ||||
-rw-r--r-- | llvm/test/MC/AMDGPU/hsa-gfx10-v3.s | 223 | ||||
-rw-r--r-- | llvm/test/MC/AMDGPU/hsa-gfx10.s | 284 | ||||
-rw-r--r-- | llvm/test/MC/AMDGPU/hsa-wave-size.s | 65 | ||||
-rw-r--r-- | llvm/test/MC/AMDGPU/hsa.s | 4 | ||||
-rw-r--r-- | llvm/test/MC/AMDGPU/hsa_isa_version_attrs.s | 2 |
6 files changed, 617 insertions, 2 deletions
diff --git a/llvm/test/MC/AMDGPU/hsa-diag-v3.s b/llvm/test/MC/AMDGPU/hsa-diag-v3.s index 5f2d89da75e..8a793ca49b1 100644 --- a/llvm/test/MC/AMDGPU/hsa-diag-v3.s +++ b/llvm/test/MC/AMDGPU/hsa-diag-v3.s @@ -1,4 +1,5 @@ // RUN: not llvm-mc -mattr=+code-object-v3 -triple amdgcn-amd-amdhsa -mcpu=gfx803 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s +// RUN: not llvm-mc -mattr=+code-object-v3 -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefix=GFX10 // RUN: not llvm-mc -mattr=+code-object-v3 -triple amdgcn-amd- -mcpu=gfx803 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefix=NOT-AMDHSA .text @@ -44,6 +45,46 @@ // CHECK: error: expected .amdhsa_ directive or .end_amdhsa_kernel .end_amdhsa_kernel +.amdhsa_kernel foo + .amdhsa_wavefront_size32 1 + // CHECK: error: directive requires gfx10+ +.end_amdhsa_kernel + +.amdhsa_kernel foo + .amdhsa_workgroup_processor_mode 1 + // CHECK: error: directive requires gfx10+ +.end_amdhsa_kernel + +.amdhsa_kernel foo + .amdhsa_memory_ordered 1 + // CHECK: error: directive requires gfx10+ +.end_amdhsa_kernel + +.amdhsa_kernel foo + .amdhsa_forward_progress 1 + // CHECK: error: directive requires gfx10+ +.end_amdhsa_kernel + +.amdhsa_kernel foo + .amdhsa_wavefront_size32 5 + // GFX10: error: value out of range +.end_amdhsa_kernel + +.amdhsa_kernel foo + .amdhsa_workgroup_processor_mode 5 + // GFX10: error: value out of range +.end_amdhsa_kernel + +.amdhsa_kernel foo + .amdhsa_memory_ordered 5 + // GFX10: error: value out of range +.end_amdhsa_kernel + +.amdhsa_kernel foo + .amdhsa_forward_progress 5 + // GFX10: error: value out of range +.end_amdhsa_kernel + .set .amdgcn.next_free_vgpr, "foo" v_mov_b32_e32 v0, s0 // CHECK: error: .amdgcn.next_free_{v,s}gpr symbols must be absolute expressions diff --git a/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s new file mode 100644 index 00000000000..ef078738856 --- /dev/null +++ b/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s @@ -0,0 +1,223 @@ +// RUN: llvm-mc -mattr=+code-object-v3 -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s +// RUN: llvm-mc -mattr=+code-object-v3 -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack -filetype=obj < %s > %t +// RUN: llvm-readobj -elf-output-style=GNU -sections -symbols -relocations %t | FileCheck --check-prefix=READOBJ %s +// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s + +// big endian not supported +// XFAIL: powerpc-, powerpc64-, s390x, mips-, mips64-, sparc + +// READOBJ: Section Headers +// READOBJ: .text PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256 +// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 0000c0 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64 + +// READOBJ: Relocation section '.rela.rodata' at offset +// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10 +// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110 +// READOBJ: 0000000000000090 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 210 + +// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries: +// READOBJ: {{[0-9]+}}: 0000000000000100 0 FUNC LOCAL PROTECTED 2 complete +// READOBJ: {{[0-9]+}}: 0000000000000040 64 OBJECT LOCAL DEFAULT 3 complete.kd +// READOBJ: {{[0-9]+}}: 0000000000000000 0 FUNC LOCAL PROTECTED 2 minimal +// READOBJ: {{[0-9]+}}: 0000000000000000 64 OBJECT LOCAL DEFAULT 3 minimal.kd +// READOBJ: {{[0-9]+}}: 0000000000000200 0 FUNC LOCAL PROTECTED 2 special_sgpr +// READOBJ: {{[0-9]+}}: 0000000000000080 64 OBJECT LOCAL DEFAULT 3 special_sgpr.kd + +// OBJDUMP: Contents of section .rodata +// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here. +// minimal +// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0030 0000ac60 80000000 00000000 00000000 +// complete +// OBJDUMP-NEXT: 0040 01000000 01000000 00000000 00000000 +// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0070 015001e4 1f0f007f 7f040000 00000000 +// special_sgpr +// OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 00a0 00000000 00000000 00000000 00000000 +// OBJDUMP-NEXT: 00b0 00000060 80000000 00000000 00000000 + +.text +// ASM: .text + +.amdgcn_target "amdgcn-amd-amdhsa--gfx1010+xnack" +// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010+xnack" + +.p2align 8 +.type minimal,@function +minimal: + s_endpgm + +.p2align 8 +.type complete,@function +complete: + s_endpgm + +.p2align 8 +.type special_sgpr,@function +special_sgpr: + s_endpgm + +.rodata +// ASM: .rodata + +// Test that only specifying required directives is allowed, and that defaulted +// values are omitted. +.p2align 6 +.amdhsa_kernel minimal + .amdhsa_next_free_vgpr 0 + .amdhsa_next_free_sgpr 0 +.end_amdhsa_kernel + +// ASM: .amdhsa_kernel minimal +// ASM: .amdhsa_next_free_vgpr 0 +// ASM-NEXT: .amdhsa_next_free_sgpr 0 +// ASM: .end_amdhsa_kernel + +// Test that we can specify all available directives with non-default values. +.p2align 6 +.amdhsa_kernel complete + .amdhsa_group_segment_fixed_size 1 + .amdhsa_private_segment_fixed_size 1 + .amdhsa_user_sgpr_private_segment_buffer 1 + .amdhsa_user_sgpr_dispatch_ptr 1 + .amdhsa_user_sgpr_queue_ptr 1 + .amdhsa_user_sgpr_kernarg_segment_ptr 1 + .amdhsa_user_sgpr_dispatch_id 1 + .amdhsa_user_sgpr_flat_scratch_init 1 + .amdhsa_user_sgpr_private_segment_size 1 + .amdhsa_wavefront_size32 1 + .amdhsa_system_sgpr_private_segment_wavefront_offset 1 + .amdhsa_system_sgpr_workgroup_id_x 0 + .amdhsa_system_sgpr_workgroup_id_y 1 + .amdhsa_system_sgpr_workgroup_id_z 1 + .amdhsa_system_sgpr_workgroup_info 1 + .amdhsa_system_vgpr_workitem_id 1 + .amdhsa_next_free_vgpr 9 + .amdhsa_next_free_sgpr 27 + .amdhsa_reserve_vcc 0 + .amdhsa_reserve_flat_scratch 0 + .amdhsa_reserve_xnack_mask 0 + .amdhsa_float_round_mode_32 1 + .amdhsa_float_round_mode_16_64 1 + .amdhsa_float_denorm_mode_32 1 + .amdhsa_float_denorm_mode_16_64 0 + .amdhsa_dx10_clamp 0 + .amdhsa_ieee_mode 0 + .amdhsa_fp16_overflow 1 + .amdhsa_workgroup_processor_mode 1 + .amdhsa_memory_ordered 1 + .amdhsa_forward_progress 1 + .amdhsa_exception_fp_ieee_invalid_op 1 + .amdhsa_exception_fp_denorm_src 1 + .amdhsa_exception_fp_ieee_div_zero 1 + .amdhsa_exception_fp_ieee_overflow 1 + .amdhsa_exception_fp_ieee_underflow 1 + .amdhsa_exception_fp_ieee_inexact 1 + .amdhsa_exception_int_div_zero 1 +.end_amdhsa_kernel + +// ASM: .amdhsa_kernel complete +// ASM-NEXT: .amdhsa_group_segment_fixed_size 1 +// ASM-NEXT: .amdhsa_private_segment_fixed_size 1 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1 +// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1 +// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1 +// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1 +// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 +// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1 +// ASM-NEXT: .amdhsa_wavefront_size32 1 +// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1 +// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1 +// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 1 +// ASM-NEXT: .amdhsa_next_free_vgpr 9 +// ASM-NEXT: .amdhsa_next_free_sgpr 27 +// ASM-NEXT: .amdhsa_reserve_vcc 0 +// ASM-NEXT: .amdhsa_reserve_flat_scratch 0 +// ASM-NEXT: .amdhsa_reserve_xnack_mask 0 +// ASM-NEXT: .amdhsa_float_round_mode_32 1 +// ASM-NEXT: .amdhsa_float_round_mode_16_64 1 +// ASM-NEXT: .amdhsa_float_denorm_mode_32 1 +// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 0 +// ASM-NEXT: .amdhsa_dx10_clamp 0 +// ASM-NEXT: .amdhsa_ieee_mode 0 +// ASM-NEXT: .amdhsa_fp16_overflow 1 +// ASM-NEXT: .amdhsa_workgroup_processor_mode 1 +// ASM-NEXT: .amdhsa_memory_ordered 1 +// ASM-NEXT: .amdhsa_forward_progress 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1 +// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1 +// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1 +// ASM-NEXT: .amdhsa_exception_int_div_zero 1 +// ASM-NEXT: .end_amdhsa_kernel + +// Test that we are including special SGPR usage in the granulated count. +.p2align 6 +.amdhsa_kernel special_sgpr + // Same next_free_sgpr as "complete", but... + .amdhsa_next_free_sgpr 27 + // ...on GFX10+ this should require an additional 6 SGPRs, pushing us from + // 3 granules to 4 + .amdhsa_reserve_flat_scratch 1 + + .amdhsa_reserve_vcc 0 + .amdhsa_reserve_xnack_mask 0 + + .amdhsa_float_denorm_mode_16_64 0 + .amdhsa_dx10_clamp 0 + .amdhsa_ieee_mode 0 + .amdhsa_next_free_vgpr 0 +.end_amdhsa_kernel + +// ASM: .amdhsa_kernel special_sgpr +// ASM: .amdhsa_next_free_vgpr 0 +// ASM-NEXT: .amdhsa_next_free_sgpr 27 +// ASM-NEXT: .amdhsa_reserve_vcc 0 +// ASM-NEXT: .amdhsa_reserve_xnack_mask 0 +// ASM: .amdhsa_float_denorm_mode_16_64 0 +// ASM-NEXT: .amdhsa_dx10_clamp 0 +// ASM-NEXT: .amdhsa_ieee_mode 0 +// ASM: .end_amdhsa_kernel + +.section .foo + +.byte .amdgcn.gfx_generation_number +// ASM: .byte 10 + +.byte .amdgcn.next_free_vgpr +// ASM: .byte 0 +.byte .amdgcn.next_free_sgpr +// ASM: .byte 0 + +v_mov_b32_e32 v7, s10 + +.byte .amdgcn.next_free_vgpr +// ASM: .byte 8 +.byte .amdgcn.next_free_sgpr +// ASM: .byte 11 + +.set .amdgcn.next_free_vgpr, 0 +.set .amdgcn.next_free_sgpr, 0 + +.byte .amdgcn.next_free_vgpr +// ASM: .byte 0 +.byte .amdgcn.next_free_sgpr +// ASM: .byte 0 + +v_mov_b32_e32 v16, s3 + +.byte .amdgcn.next_free_vgpr +// ASM: .byte 17 +.byte .amdgcn.next_free_sgpr +// ASM: .byte 4 diff --git a/llvm/test/MC/AMDGPU/hsa-gfx10.s b/llvm/test/MC/AMDGPU/hsa-gfx10.s new file mode 100644 index 00000000000..db868f300f0 --- /dev/null +++ b/llvm/test/MC/AMDGPU/hsa-gfx10.s @@ -0,0 +1,284 @@ +// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64,-code-object-v3 -show-encoding %s | FileCheck %s --check-prefix=ASM +// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64,-code-object-v3 -show-encoding %s | llvm-readobj -symbols -s -sd | FileCheck %s --check-prefix=ELF + +// ELF: Section { +// ELF: Name: .text +// ELF: Type: SHT_PROGBITS (0x1) +// ELF: Flags [ (0x6) +// ELF: SHF_ALLOC (0x2) +// ELF: SHF_EXECINSTR (0x4) + +// ELF: SHT_NOTE +// ELF: 0000: 04000000 08000000 01000000 414D4400 +// ELF: 0010: 02000000 00000000 04000000 1B000000 +// ELF: 0020: 03000000 414D4400 04000700 07000000 +// ELF: 0030: 00000000 00000000 414D4400 414D4447 +// ELF: 0040: 50550000 +// We can't check binary representation of metadata note: it is different on +// Windows and Linux because of carriage return on Windows + +// ELF: Symbol { +// ELF: Name: amd_kernel_code_t_minimal +// ELF: Type: AMDGPU_HSA_KERNEL (0xA) +// ELF: Section: .text +// ELF: } +// ELF: Symbol { +// ELF: Name: amd_kernel_code_t_test_all +// ELF: Type: AMDGPU_HSA_KERNEL (0xA) +// ELF: Section: .text +// ELF: } + +.text +// ASM: .text + +.hsa_code_object_version 2,0 +// ASM: .hsa_code_object_version 2,0 + +.hsa_code_object_isa 7,0,0,"AMD","AMDGPU" +// ASM: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU" + +.amd_amdgpu_hsa_metadata + Version: [ 3, 0 ] + Kernels: + - Name: amd_kernel_code_t_test_all + SymbolName: amd_kernel_code_t_test_all@kd + - Name: amd_kernel_code_t_minimal + SymbolName: amd_kernel_code_t_minimal@kd +.end_amd_amdgpu_hsa_metadata + +// ASM: .amd_amdgpu_hsa_metadata +// ASM: Version: [ 3, 0 ] +// ASM: Kernels: +// ASM: - Name: amd_kernel_code_t_test_all +// ASM: SymbolName: 'amd_kernel_code_t_test_all@kd' +// ASM: - Name: amd_kernel_code_t_minimal +// ASM: SymbolName: 'amd_kernel_code_t_minimal@kd' +// ASM: .end_amd_amdgpu_hsa_metadata + +.amdgpu_hsa_kernel amd_kernel_code_t_test_all +.amdgpu_hsa_kernel amd_kernel_code_t_minimal + +amd_kernel_code_t_test_all: +; Test all amd_kernel_code_t members with non-default values. +.amd_kernel_code_t + kernel_code_version_major = 100 + kernel_code_version_minor = 100 + machine_kind = 0 + machine_version_major = 5 + machine_version_minor = 5 + machine_version_stepping = 5 + kernel_code_entry_byte_offset = 512 + kernel_code_prefetch_byte_size = 1 + max_scratch_backing_memory_byte_size = 1 + compute_pgm_rsrc1_vgprs = 1 + compute_pgm_rsrc1_sgprs = 1 + compute_pgm_rsrc1_priority = 1 + compute_pgm_rsrc1_float_mode = 1 + compute_pgm_rsrc1_priv = 1 + compute_pgm_rsrc1_dx10_clamp = 1 + compute_pgm_rsrc1_debug_mode = 1 + compute_pgm_rsrc1_ieee_mode = 1 + compute_pgm_rsrc1_wgp_mode = 0 + compute_pgm_rsrc1_mem_ordered = 0 + compute_pgm_rsrc1_fwd_progress = 1 + compute_pgm_rsrc2_scratch_en = 1 + compute_pgm_rsrc2_user_sgpr = 1 + compute_pgm_rsrc2_tgid_x_en = 1 + compute_pgm_rsrc2_tgid_y_en = 1 + compute_pgm_rsrc2_tgid_z_en = 1 + compute_pgm_rsrc2_tg_size_en = 1 + compute_pgm_rsrc2_tidig_comp_cnt = 1 + compute_pgm_rsrc2_excp_en_msb = 1 + compute_pgm_rsrc2_lds_size = 1 + compute_pgm_rsrc2_excp_en = 1 + enable_sgpr_private_segment_buffer = 1 + enable_sgpr_dispatch_ptr = 1 + enable_sgpr_queue_ptr = 1 + enable_sgpr_kernarg_segment_ptr = 1 + enable_sgpr_dispatch_id = 1 + enable_sgpr_flat_scratch_init = 1 + enable_sgpr_private_segment_size = 1 + enable_sgpr_grid_workgroup_count_x = 1 + enable_sgpr_grid_workgroup_count_y = 1 + enable_sgpr_grid_workgroup_count_z = 1 + enable_ordered_append_gds = 1 + private_element_size = 1 + is_ptr64 = 1 + is_dynamic_callstack = 1 + is_debug_enabled = 1 + is_xnack_enabled = 1 + workitem_private_segment_byte_size = 1 + workgroup_group_segment_byte_size = 1 + gds_segment_byte_size = 1 + kernarg_segment_byte_size = 1 + workgroup_fbarrier_count = 1 + wavefront_sgpr_count = 1 + workitem_vgpr_count = 1 + reserved_vgpr_first = 1 + reserved_vgpr_count = 1 + reserved_sgpr_first = 1 + reserved_sgpr_count = 1 + debug_wavefront_private_segment_offset_sgpr = 1 + debug_private_segment_buffer_sgpr = 1 + kernarg_segment_alignment = 5 + group_segment_alignment = 5 + private_segment_alignment = 5 + wavefront_size = 6 + call_convention = 1 + runtime_loader_kernel_symbol = 1 +.end_amd_kernel_code_t + +// ASM-LABEL: {{^}}amd_kernel_code_t_test_all: +// ASM: .amd_kernel_code_t +// ASM: amd_code_version_major = 100 +// ASM: amd_code_version_minor = 100 +// ASM: amd_machine_kind = 0 +// ASM: amd_machine_version_major = 5 +// ASM: amd_machine_version_minor = 5 +// ASM: amd_machine_version_stepping = 5 +// ASM: kernel_code_entry_byte_offset = 512 +// ASM: kernel_code_prefetch_byte_size = 1 +// ASM: granulated_workitem_vgpr_count = 1 +// ASM: granulated_wavefront_sgpr_count = 1 +// ASM: priority = 1 +// ASM: float_mode = 1 +// ASM: priv = 1 +// ASM: enable_dx10_clamp = 1 +// ASM: debug_mode = 1 +// ASM: enable_ieee_mode = 1 +// ASM: enable_wgp_mode = 0 +// ASM: enable_mem_ordered = 0 +// ASM: enable_fwd_progress = 1 +// ASM: enable_sgpr_private_segment_wave_byte_offset = 1 +// ASM: user_sgpr_count = 1 +// ASM: enable_sgpr_workgroup_id_x = 1 +// ASM: enable_sgpr_workgroup_id_y = 1 +// ASM: enable_sgpr_workgroup_id_z = 1 +// ASM: enable_sgpr_workgroup_info = 1 +// ASM: enable_vgpr_workitem_id = 1 +// ASM: enable_exception_msb = 1 +// ASM: granulated_lds_size = 1 +// ASM: enable_exception = 1 +// ASM: enable_sgpr_private_segment_buffer = 1 +// ASM: enable_sgpr_dispatch_ptr = 1 +// ASM: enable_sgpr_queue_ptr = 1 +// ASM: enable_sgpr_kernarg_segment_ptr = 1 +// ASM: enable_sgpr_dispatch_id = 1 +// ASM: enable_sgpr_flat_scratch_init = 1 +// ASM: enable_sgpr_private_segment_size = 1 +// ASM: enable_sgpr_grid_workgroup_count_x = 1 +// ASM: enable_sgpr_grid_workgroup_count_y = 1 +// ASM: enable_sgpr_grid_workgroup_count_z = 1 +// ASM: enable_ordered_append_gds = 1 +// ASM: private_element_size = 1 +// ASM: is_ptr64 = 1 +// ASM: is_dynamic_callstack = 1 +// ASM: is_debug_enabled = 1 +// ASM: is_xnack_enabled = 1 +// ASM: workitem_private_segment_byte_size = 1 +// ASM: workgroup_group_segment_byte_size = 1 +// ASM: gds_segment_byte_size = 1 +// ASM: kernarg_segment_byte_size = 1 +// ASM: workgroup_fbarrier_count = 1 +// ASM: wavefront_sgpr_count = 1 +// ASM: workitem_vgpr_count = 1 +// ASM: reserved_vgpr_first = 1 +// ASM: reserved_vgpr_count = 1 +// ASM: reserved_sgpr_first = 1 +// ASM: reserved_sgpr_count = 1 +// ASM: debug_wavefront_private_segment_offset_sgpr = 1 +// ASM: debug_private_segment_buffer_sgpr = 1 +// ASM: kernarg_segment_alignment = 5 +// ASM: group_segment_alignment = 5 +// ASM: private_segment_alignment = 5 +// ASM: wavefront_size = 6 +// ASM: call_convention = 1 +// ASM: runtime_loader_kernel_symbol = 1 +// ASM: .end_amd_kernel_code_t + +amd_kernel_code_t_minimal: +.amd_kernel_code_t + enable_sgpr_kernarg_segment_ptr = 1 + is_ptr64 = 1 + granulated_workitem_vgpr_count = 1 + granulated_wavefront_sgpr_count = 1 + user_sgpr_count = 2 + kernarg_segment_byte_size = 16 + wavefront_sgpr_count = 8 +// wavefront_sgpr_count = 7 +; wavefront_sgpr_count = 7 +// Make sure a blank line won't break anything: + +// Make sure a line with whitespace won't break anything: + + workitem_vgpr_count = 16 +.end_amd_kernel_code_t + +// ASM-LABEL: {{^}}amd_kernel_code_t_minimal: +// ASM: .amd_kernel_code_t +// ASM: amd_code_version_major = 1 +// ASM: amd_code_version_minor = 2 +// ASM: amd_machine_kind = 1 +// ASM: amd_machine_version_major = 10 +// ASM: amd_machine_version_minor = 1 +// ASM: amd_machine_version_stepping = 0 +// ASM: kernel_code_entry_byte_offset = 256 +// ASM: kernel_code_prefetch_byte_size = 0 +// ASM: granulated_workitem_vgpr_count = 1 +// ASM: granulated_wavefront_sgpr_count = 1 +// ASM: priority = 0 +// ASM: float_mode = 0 +// ASM: priv = 0 +// ASM: enable_dx10_clamp = 0 +// ASM: debug_mode = 0 +// ASM: enable_ieee_mode = 0 +// ASM: enable_wgp_mode = 1 +// ASM: enable_mem_ordered = 1 +// ASM: enable_fwd_progress = 0 +// ASM: enable_sgpr_private_segment_wave_byte_offset = 0 +// ASM: user_sgpr_count = 2 +// ASM: enable_sgpr_workgroup_id_x = 0 +// ASM: enable_sgpr_workgroup_id_y = 0 +// ASM: enable_sgpr_workgroup_id_z = 0 +// ASM: enable_sgpr_workgroup_info = 0 +// ASM: enable_vgpr_workitem_id = 0 +// ASM: enable_exception_msb = 0 +// ASM: granulated_lds_size = 0 +// ASM: enable_exception = 0 +// ASM: enable_sgpr_private_segment_buffer = 0 +// ASM: enable_sgpr_dispatch_ptr = 0 +// ASM: enable_sgpr_queue_ptr = 0 +// ASM: enable_sgpr_kernarg_segment_ptr = 1 +// ASM: enable_sgpr_dispatch_id = 0 +// ASM: enable_sgpr_flat_scratch_init = 0 +// ASM: enable_sgpr_private_segment_size = 0 +// ASM: enable_sgpr_grid_workgroup_count_x = 0 +// ASM: enable_sgpr_grid_workgroup_count_y = 0 +// ASM: enable_sgpr_grid_workgroup_count_z = 0 +// ASM: enable_wavefront_size32 = 0 +// ASM: enable_ordered_append_gds = 0 +// ASM: private_element_size = 0 +// ASM: is_ptr64 = 1 +// ASM: is_dynamic_callstack = 0 +// ASM: is_debug_enabled = 0 +// ASM: is_xnack_enabled = 0 +// ASM: workitem_private_segment_byte_size = 0 +// ASM: workgroup_group_segment_byte_size = 0 +// ASM: gds_segment_byte_size = 0 +// ASM: kernarg_segment_byte_size = 16 +// ASM: workgroup_fbarrier_count = 0 +// ASM: wavefront_sgpr_count = 8 +// ASM: workitem_vgpr_count = 16 +// ASM: reserved_vgpr_first = 0 +// ASM: reserved_vgpr_count = 0 +// ASM: reserved_sgpr_first = 0 +// ASM: reserved_sgpr_count = 0 +// ASM: debug_wavefront_private_segment_offset_sgpr = 0 +// ASM: debug_private_segment_buffer_sgpr = 0 +// ASM: kernarg_segment_alignment = 4 +// ASM: group_segment_alignment = 4 +// ASM: private_segment_alignment = 4 +// ASM: wavefront_size = 6 +// ASM: call_convention = -1 +// ASM: runtime_loader_kernel_symbol = 0 +// ASM: .end_amd_kernel_code_t diff --git a/llvm/test/MC/AMDGPU/hsa-wave-size.s b/llvm/test/MC/AMDGPU/hsa-wave-size.s new file mode 100644 index 00000000000..8785895ce18 --- /dev/null +++ b/llvm/test/MC/AMDGPU/hsa-wave-size.s @@ -0,0 +1,65 @@ +// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -mattr=-code-object-v3 %s | FileCheck --check-prefixes=GCN,GFX7 %s +// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-code-object-v3,+WavefrontSize32,-WavefrontSize64 %s | FileCheck --check-prefixes=GCN,GFX10-W32 %s +// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-code-object-v3,-WavefrontSize32,+WavefrontSize64 %s | FileCheck --check-prefixes=GCN,GFX10-W64 %s + +// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -mattr=-code-object-v3 %s 2>&1 | FileCheck --check-prefixes=GCN-ERR,GFX7-ERR %s +// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-code-object-v3,+WavefrontSize32,-WavefrontSize64 %s 2>&1 | FileCheck --check-prefixes=GCN-ERR,GFX10-W32-ERR %s +// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-code-object-v3,-WavefrontSize32,+WavefrontSize64 %s 2>&1 | FileCheck --check-prefixes=GCN-ERR,GFX10-W64-ERR %s + +// GCN: test0: +// GFX7: enable_wavefront_size32 = 0 +// GFX7: wavefront_size = 6 +// GFX10-W32: enable_wavefront_size32 = 1 +// GFX10-W32: wavefront_size = 5 +// GFX10-W64: enable_wavefront_size32 = 0 +// GFX10-W64: wavefront_size = 6 +.amdgpu_hsa_kernel test0 +test0: +.amd_kernel_code_t +.end_amd_kernel_code_t + +// GCN: test1: +// GFX7: enable_wavefront_size32 = 0 +// GFX7: wavefront_size = 6 +// GFX10-W32-ERR: error: enable_wavefront_size32=0 requires +WavefrontSize64 +// GFX10-W64: enable_wavefront_size32 = 0 +// GFX10-W64: wavefront_size = 6 +.amdgpu_hsa_kernel test1 +test1: +.amd_kernel_code_t + enable_wavefront_size32 = 0 +.end_amd_kernel_code_t + +// GCN: test2: +// GFX7: enable_wavefront_size32 = 0 +// GFX7: wavefront_size = 6 +// GFX10-W32-ERR: error: wavefront_size=6 requires +WavefrontSize64 +// GFX10-W64: enable_wavefront_size32 = 0 +// GFX10-W64: wavefront_size = 6 +.amdgpu_hsa_kernel test2 +test2: +.amd_kernel_code_t + wavefront_size = 6 +.end_amd_kernel_code_t + +// GCN: test3: +// GFX7-ERR: error: enable_wavefront_size32=1 is only allowed on GFX10+ +// GFX10-W32: enable_wavefront_size32 = 1 +// GFX10-W32: wavefront_size = 5 +// GFX10-W64-ERR: error: enable_wavefront_size32=1 requires +WavefrontSize32 +.amdgpu_hsa_kernel test3 +test3: +.amd_kernel_code_t + enable_wavefront_size32 = 1 +.end_amd_kernel_code_t + +// GCN: test4: +// GFX7-ERR: error: wavefront_size=5 is only allowed on GFX10+ +// GFX10-W32: enable_wavefront_size32 = 1 +// GFX10-W32: wavefront_size = 5 +// GFX10-W64-ERR: error: wavefront_size=5 requires +WavefrontSize32 +.amdgpu_hsa_kernel test4 +test4: +.amd_kernel_code_t + wavefront_size = 5 +.end_amd_kernel_code_t diff --git a/llvm/test/MC/AMDGPU/hsa.s b/llvm/test/MC/AMDGPU/hsa.s index 9eda20da063..87c09648e51 100644 --- a/llvm/test/MC/AMDGPU/hsa.s +++ b/llvm/test/MC/AMDGPU/hsa.s @@ -120,7 +120,7 @@ amd_kernel_code_t_test_all: kernarg_segment_alignment = 5 group_segment_alignment = 5 private_segment_alignment = 5 - wavefront_size = 5 + wavefront_size = 6 call_convention = 1 runtime_loader_kernel_symbol = 1 .end_amd_kernel_code_t @@ -185,7 +185,7 @@ amd_kernel_code_t_test_all: // ASM: kernarg_segment_alignment = 5 // ASM: group_segment_alignment = 5 // ASM: private_segment_alignment = 5 -// ASM: wavefront_size = 5 +// ASM: wavefront_size = 6 // ASM: call_convention = 1 // ASM: runtime_loader_kernel_symbol = 1 // ASM: .end_amd_kernel_code_t diff --git a/llvm/test/MC/AMDGPU/hsa_isa_version_attrs.s b/llvm/test/MC/AMDGPU/hsa_isa_version_attrs.s index ddd76fcf918..0a5686e27e8 100644 --- a/llvm/test/MC/AMDGPU/hsa_isa_version_attrs.s +++ b/llvm/test/MC/AMDGPU/hsa_isa_version_attrs.s @@ -1,6 +1,8 @@ // RUN: llvm-mc -arch=amdgcn -mcpu=gfx801 -mattr=-code-object-v3,-fast-fmaf -show-encoding %s | FileCheck --check-prefix=GFX8 %s // RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -mattr=-code-object-v3,-mad-mix-insts -show-encoding %s | FileCheck --check-prefix=GFX9 %s +// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-code-object-v3,-WavefrontSize32 -show-encoding %s | FileCheck --check-prefix=GFX10 %s .hsa_code_object_isa // GFX8: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU" // GFX9: .hsa_code_object_isa 9,0,0,"AMD","AMDGPU" +// GFX10: .hsa_code_object_isa 10,1,0,"AMD","AMDGPU" |