diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/addrspacecast.ll | 59 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/debugger-reserve-regs.ll | 1 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/hsa-note-no-func.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir | 214 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll | 9 |
5 files changed, 268 insertions, 19 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll index b6ceee32c72..12dcda95986 100644 --- a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll +++ b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll @@ -1,14 +1,19 @@ -; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=HSA %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=HSA -check-prefix=CI %s +; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=HSA -check-prefix=GFX9 %s ; HSA-LABEL: {{^}}use_group_to_flat_addrspacecast: ; HSA: enable_sgpr_private_segment_buffer = 1 ; HSA: enable_sgpr_dispatch_ptr = 0 -; HSA: enable_sgpr_queue_ptr = 1 +; CI: enable_sgpr_queue_ptr = 1 +; GFX9: enable_sgpr_queue_ptr = 0 -; HSA-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}} -; HSA-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10{{$}} +; CI-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}} +; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10{{$}} +; CI-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]] + +; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}} +; GFX9-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_shared_base -; HSA-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]] ; HSA-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] ; HSA-DAG: v_cmp_ne_u32_e64 vcc, [[PTR]], -1 @@ -17,6 +22,12 @@ ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7 ; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, [[K]] + +; At most 2 digits. Make sure src_shared_base is not counted as a high +; number SGPR. + +; CI: NumSgprs: {{[0-9][0-9]+}} +; GFX9: NumSgprs: {{[0-9]+}} define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #0 { %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)* store volatile i32 7, i32 addrspace(4)* %stof @@ -26,12 +37,16 @@ define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #0 { ; HSA-LABEL: {{^}}use_private_to_flat_addrspacecast: ; HSA: enable_sgpr_private_segment_buffer = 1 ; HSA: enable_sgpr_dispatch_ptr = 0 -; HSA: enable_sgpr_queue_ptr = 1 +; CI: enable_sgpr_queue_ptr = 1 +; GFX9: enable_sgpr_queue_ptr = 0 -; HSA-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}} -; HSA-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11{{$}} +; CI-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}} +; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11{{$}} +; CI-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]] + +; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}} +; GFX9-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_private_base -; HSA-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]] ; HSA-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]] ; HSA-DAG: v_cmp_ne_u32_e64 vcc, [[PTR]], -1 @@ -40,6 +55,9 @@ define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #0 { ; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7 ; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, [[K]] + +; CI: NumSgprs: {{[0-9][0-9]+}} +; GFX9: NumSgprs: {{[0-9]+}} define void @use_private_to_flat_addrspacecast(i32* %ptr) #0 { %stof = addrspacecast i32* %ptr to i32 addrspace(4)* store volatile i32 7, i32 addrspace(4)* %stof @@ -133,8 +151,10 @@ define void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* %ptr) #0 { } ; HSA-LABEL: {{^}}cast_0_group_to_flat_addrspacecast: -; HSA: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10 -; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]] +; CI: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10 +; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]] +; GFX9-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], src_shared_base + ; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}} ; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]] @@ -176,8 +196,11 @@ define void @cast_neg1_flat_to_group_addrspacecast() #0 { } ; HSA-LABEL: {{^}}cast_0_private_to_flat_addrspacecast: -; HSA: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11 -; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]] +; CI: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11 +; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]] + +; GFX9-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], src_private_base + ; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}} ; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}} ; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]] @@ -226,9 +249,13 @@ end: ; Check for prologue initializing special SGPRs pointing to scratch. ; HSA-LABEL: {{^}}store_flat_scratch: -; HSA-DAG: s_mov_b32 flat_scratch_lo, s9 -; HSA-DAG: s_add_u32 [[ADD:s[0-9]+]], s8, s11 -; HSA: s_lshr_b32 flat_scratch_hi, [[ADD]], 8 +; CI-DAG: s_mov_b32 flat_scratch_lo, s9 +; CI-DAG: s_add_u32 [[ADD:s[0-9]+]], s8, s11 +; CI: s_lshr_b32 flat_scratch_hi, [[ADD]], 8 + +; GFX9: s_add_u32 flat_scratch_lo, s6, s9 +; GFX9: s_addc_u32 flat_scratch_hi, s7, 0 + ; HSA: flat_store_dword ; HSA: s_barrier ; HSA: flat_load_dword diff --git a/llvm/test/CodeGen/AMDGPU/debugger-reserve-regs.ll b/llvm/test/CodeGen/AMDGPU/debugger-reserve-regs.ll index d30bb20bb03..d63ba149ba6 100644 --- a/llvm/test/CodeGen/AMDGPU/debugger-reserve-regs.ll +++ b/llvm/test/CodeGen/AMDGPU/debugger-reserve-regs.ll @@ -1,4 +1,5 @@ ; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-reserve-regs -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=gfx901 -mattr=+amdgpu-debugger-reserve-regs -verify-machineinstrs < %s | FileCheck %s ; CHECK: reserved_vgpr_first = {{[0-9]+}} ; CHECK-NEXT: reserved_vgpr_count = 4 ; CHECK: ReservedVGPRFirst: {{[0-9]+}} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-note-no-func.ll b/llvm/test/CodeGen/AMDGPU/hsa-note-no-func.ll index a4e599230b7..af63a4f8df7 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-note-no-func.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-note-no-func.ll @@ -13,6 +13,8 @@ ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx803 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI803 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx804 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI804 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx810 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI810 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX900 %s +; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx901 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX901 %s ; HSA: .hsa_code_object_version 2,1 ; HSA-CI700: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU" @@ -24,3 +26,5 @@ ; HSA-VI803: .hsa_code_object_isa 8,0,3,"AMD","AMDGPU" ; HSA-VI804: .hsa_code_object_isa 8,0,4,"AMD","AMDGPU" ; HSA-VI810: .hsa_code_object_isa 8,1,0,"AMD","AMDGPU" +; HSA-GFX900: .hsa_code_object_isa 9,0,0,"AMD","AMDGPU" +; HSA-GFX901: .hsa_code_object_isa 9,0,1,"AMD","AMDGPU" diff --git a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir index 85cd903a405..e97cb1b5c34 100644 --- a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir +++ b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir @@ -1,6 +1,7 @@ # RUN: llc -march=amdgcn -mcpu=tahiti -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN # RUN: llc -march=amdgcn -mcpu=hawaii -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI # RUN: llc -march=amdgcn -mcpu=fiji -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,VI +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,VI,GFX9 --- | define void @div_fmas() { ret void } @@ -9,6 +10,37 @@ define void @vmem_gt_8dw_store() { ret void } define void @readwrite_lane() { ret void } define void @rfe() { ret void } + define void @s_mov_fed_b32() { ret void } + define void @s_movrel() { ret void } + define void @v_interp() { ret void } + + define void @mov_fed_hazard_crash_on_dbg_value(i32 addrspace(1)* %A) { + entry: + %A.addr = alloca i32 addrspace(1)*, align 4 + store i32 addrspace(1)* %A, i32 addrspace(1)** %A.addr, align 4 + call void @llvm.dbg.declare(metadata i32 addrspace(1)** %A.addr, metadata !5, metadata !11), !dbg !12 + ret void + } + + declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + + !llvm.dbg.cu = !{!0} + !llvm.module.flags = !{!3, !4} + + !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 (trunk 268929)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) + !1 = !DIFile(filename: "test01.cl", directory: "/dev/null") + !2 = !{} + !3 = !{i32 2, !"Dwarf Version", i32 2} + !4 = !{i32 2, !"Debug Info Version", i32 3} + !5 = !DILocalVariable(name: "A", arg: 1, scope: !6, file: !1, line: 1, type: !9) + !6 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2) + !7 = !DISubroutineType(types: !8) + !8 = !{null, !9} + !9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 64, align: 32) + !10 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) + !11 = !DIExpression() + !12 = !DILocation(line: 1, column: 30, scope: !6) + ... --- # GCN-LABEL: name: div_fmas @@ -331,3 +363,185 @@ body: | S_ENDPGM ... + +... +--- + +# GCN-LABEL: name: s_mov_fed_b32 + +# GCN-LABEL: bb.0: +# GCN: S_MOV_FED_B32 +# GFX9: S_NOP +# GCN-NEXT: S_MOV_B32 + +# GCN-LABEL: bb.1: +# GCN: S_MOV_FED_B32 +# GFX9: S_NOP +# GCN-NEXT: V_MOV_B32 +name: s_mov_fed_b32 + +body: | + bb.0: + successors: %bb.1 + %sgpr0 = S_MOV_FED_B32 %sgpr0 + %sgpr0 = S_MOV_B32 %sgpr0 + S_BRANCH %bb.1 + + bb.1: + %sgpr0 = S_MOV_FED_B32 %sgpr0 + %vgpr0 = V_MOV_B32_e32 %sgpr0, implicit %exec + S_ENDPGM + +... + +... +--- + +# GCN-LABEL: name: s_movrel + +# GCN-LABEL: bb.0: +# GCN: S_MOV_B32 +# GFX9: S_NOP +# GCN-NEXT: S_MOVRELS_B32 + +# GCN-LABEL: bb.1: +# GCN: S_MOV_B32 +# GFX9: S_NOP +# GCN-NEXT: S_MOVRELS_B64 + +# GCN-LABEL: bb.2: +# GCN: S_MOV_B32 +# GFX9: S_NOP +# GCN-NEXT: S_MOVRELD_B32 + +# GCN-LABEL: bb.3: +# GCN: S_MOV_B32 +# GFX9: S_NOP +# GCN-NEXT: S_MOVRELD_B64 + +name: s_movrel + +body: | + bb.0: + successors: %bb.1 + %m0 = S_MOV_B32 0 + %sgpr0 = S_MOVRELS_B32 %sgpr0, implicit %m0 + S_BRANCH %bb.1 + + bb.1: + successors: %bb.2 + %m0 = S_MOV_B32 0 + %sgpr0_sgpr1 = S_MOVRELS_B64 %sgpr0_sgpr1, implicit %m0 + S_BRANCH %bb.2 + + bb.2: + successors: %bb.3 + %m0 = S_MOV_B32 0 + %sgpr0 = S_MOVRELD_B32 %sgpr0, implicit %m0 + S_BRANCH %bb.3 + + bb.3: + %m0 = S_MOV_B32 0 + %sgpr0_sgpr1 = S_MOVRELD_B64 %sgpr0_sgpr1, implicit %m0 + S_ENDPGM +... + +... +--- + +# GCN-LABEL: name: v_interp + +# GCN-LABEL: bb.0: +# GCN: S_MOV_B32 +# GFX9: S_NOP +# GCN-NEXT: V_INTERP_P1_F32 + +# GCN-LABEL: bb.1: +# GCN: S_MOV_B32 +# GFX9: S_NOP +# GCN-NEXT: V_INTERP_P2_F32 + +# GCN-LABEL: bb.2: +# GCN: S_MOV_B32 +# GFX9: S_NOP +# GCN-NEXT: V_INTERP_P1_F32_16bank + +# GCN-LABEL: bb.3: +# GCN: S_MOV_B32 +# GFX9: S_NOP +# GCN-NEXT: V_INTERP_MOV_F32 + +name: v_interp + +body: | + bb.0: + successors: %bb.1 + %m0 = S_MOV_B32 0 + %vgpr0 = V_INTERP_P1_F32 %vgpr0, 0, 0, implicit %m0, implicit %exec + S_BRANCH %bb.1 + + bb.1: + successors: %bb.2 + %m0 = S_MOV_B32 0 + %vgpr0 = V_INTERP_P2_F32 %vgpr0, %vgpr1, 0, 0, implicit %m0, implicit %exec + S_BRANCH %bb.2 + + bb.2: + successors: %bb.3 + %m0 = S_MOV_B32 0 + %vgpr0 = V_INTERP_P1_F32_16bank %vgpr0, 0, 0, implicit %m0, implicit %exec + S_BRANCH %bb.3 + + bb.3: + %m0 = S_MOV_B32 0 + %vgpr0 = V_INTERP_MOV_F32 0, 0, 0, implicit %m0, implicit %exec + S_ENDPGM +... +--- +name: mov_fed_hazard_crash_on_dbg_value +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +liveins: + - { reg: '%sgpr4_sgpr5' } + - { reg: '%sgpr6_sgpr7' } + - { reg: '%sgpr9' } + - { reg: '%sgpr0_sgpr1_sgpr2_sgpr3' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 16 + offsetAdjustment: 0 + maxAlignment: 8 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +stack: + - { id: 0, name: A.addr, offset: 0, size: 8, alignment: 8, local-offset: 0 } + - { id: 1, offset: 8, size: 4, alignment: 4 } +body: | + bb.0.entry: + liveins: %sgpr4_sgpr5, %sgpr6_sgpr7, %sgpr9, %sgpr0_sgpr1_sgpr2_sgpr3 + + %flat_scr_lo = S_ADD_U32 %sgpr6, %sgpr9, implicit-def %scc + %flat_scr_hi = S_ADDC_U32 %sgpr7, 0, implicit-def %scc, implicit %scc + DBG_VALUE _, 2, !5, !11, debug-location !12 + %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr4_sgpr5, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) + dead %sgpr6_sgpr7 = KILL %sgpr4_sgpr5 + %sgpr8 = S_MOV_B32 %sgpr5 + %vgpr0 = V_MOV_B32_e32 killed %sgpr8, implicit %exec + BUFFER_STORE_DWORD_OFFSET %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr9, 4, 0, 0, 0, implicit %exec :: (store 4 into %ir.A.addr + 4) + %sgpr8 = S_MOV_B32 %sgpr4, implicit killed %sgpr4_sgpr5 + %vgpr0 = V_MOV_B32_e32 killed %sgpr8, implicit %exec + BUFFER_STORE_DWORD_OFFSET %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr9, 0, 0, 0, 0, implicit %exec :: (store 4 into %ir.A.addr) + S_ENDPGM + +... diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll index 132e476d5e2..9559b5a84b1 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll @@ -1,8 +1,11 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s ; GCN-LABEL: {{^}}test_barrier: -; GCN: buffer_store_dword -; GCN: s_waitcnt +; GFX8: buffer_store_dword +; GFX8: s_waitcnt +; GFX9: flat_store_dword +; GFX9-NOT: s_waitcnt ; GCN: s_barrier define void @test_barrier(i32 addrspace(1)* %out) #0 { entry: |