summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
-rw-r--r--llvm/test/CodeGen/AMDGPU/addrspacecast.ll59
-rw-r--r--llvm/test/CodeGen/AMDGPU/debugger-reserve-regs.ll1
-rw-r--r--llvm/test/CodeGen/AMDGPU/hsa-note-no-func.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir214
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll9
5 files changed, 268 insertions, 19 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
index b6ceee32c72..12dcda95986 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
@@ -1,14 +1,19 @@
-; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=HSA %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=HSA -check-prefix=CI %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=HSA -check-prefix=GFX9 %s
; HSA-LABEL: {{^}}use_group_to_flat_addrspacecast:
; HSA: enable_sgpr_private_segment_buffer = 1
; HSA: enable_sgpr_dispatch_ptr = 0
-; HSA: enable_sgpr_queue_ptr = 1
+; CI: enable_sgpr_queue_ptr = 1
+; GFX9: enable_sgpr_queue_ptr = 0
-; HSA-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}}
-; HSA-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10{{$}}
+; CI-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}}
+; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10{{$}}
+; CI-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
+
+; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}}
+; GFX9-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_shared_base
-; HSA-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
; HSA-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
; HSA-DAG: v_cmp_ne_u32_e64 vcc, [[PTR]], -1
@@ -17,6 +22,12 @@
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, [[K]]
+
+; At most 2 digits. Make sure src_shared_base is not counted as a high
+; number SGPR.
+
+; CI: NumSgprs: {{[0-9][0-9]+}}
+; GFX9: NumSgprs: {{[0-9]+}}
define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #0 {
%stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
store volatile i32 7, i32 addrspace(4)* %stof
@@ -26,12 +37,16 @@ define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #0 {
; HSA-LABEL: {{^}}use_private_to_flat_addrspacecast:
; HSA: enable_sgpr_private_segment_buffer = 1
; HSA: enable_sgpr_dispatch_ptr = 0
-; HSA: enable_sgpr_queue_ptr = 1
+; CI: enable_sgpr_queue_ptr = 1
+; GFX9: enable_sgpr_queue_ptr = 0
-; HSA-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}}
-; HSA-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11{{$}}
+; CI-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}}
+; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11{{$}}
+; CI-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
+
+; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}}
+; GFX9-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_private_base
-; HSA-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
; HSA-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
; HSA-DAG: v_cmp_ne_u32_e64 vcc, [[PTR]], -1
@@ -40,6 +55,9 @@ define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #0 {
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, [[K]]
+
+; CI: NumSgprs: {{[0-9][0-9]+}}
+; GFX9: NumSgprs: {{[0-9]+}}
define void @use_private_to_flat_addrspacecast(i32* %ptr) #0 {
%stof = addrspacecast i32* %ptr to i32 addrspace(4)*
store volatile i32 7, i32 addrspace(4)* %stof
@@ -133,8 +151,10 @@ define void @use_flat_to_constant_addrspacecast(i32 addrspace(4)* %ptr) #0 {
}
; HSA-LABEL: {{^}}cast_0_group_to_flat_addrspacecast:
-; HSA: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10
-; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
+; CI: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10
+; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
+; GFX9-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], src_shared_base
+
; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
@@ -176,8 +196,11 @@ define void @cast_neg1_flat_to_group_addrspacecast() #0 {
}
; HSA-LABEL: {{^}}cast_0_private_to_flat_addrspacecast:
-; HSA: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11
-; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
+; CI: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11
+; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
+
+; GFX9-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], src_private_base
+
; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
@@ -226,9 +249,13 @@ end:
; Check for prologue initializing special SGPRs pointing to scratch.
; HSA-LABEL: {{^}}store_flat_scratch:
-; HSA-DAG: s_mov_b32 flat_scratch_lo, s9
-; HSA-DAG: s_add_u32 [[ADD:s[0-9]+]], s8, s11
-; HSA: s_lshr_b32 flat_scratch_hi, [[ADD]], 8
+; CI-DAG: s_mov_b32 flat_scratch_lo, s9
+; CI-DAG: s_add_u32 [[ADD:s[0-9]+]], s8, s11
+; CI: s_lshr_b32 flat_scratch_hi, [[ADD]], 8
+
+; GFX9: s_add_u32 flat_scratch_lo, s6, s9
+; GFX9: s_addc_u32 flat_scratch_hi, s7, 0
+
; HSA: flat_store_dword
; HSA: s_barrier
; HSA: flat_load_dword
diff --git a/llvm/test/CodeGen/AMDGPU/debugger-reserve-regs.ll b/llvm/test/CodeGen/AMDGPU/debugger-reserve-regs.ll
index d30bb20bb03..d63ba149ba6 100644
--- a/llvm/test/CodeGen/AMDGPU/debugger-reserve-regs.ll
+++ b/llvm/test/CodeGen/AMDGPU/debugger-reserve-regs.ll
@@ -1,4 +1,5 @@
; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-reserve-regs -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=gfx901 -mattr=+amdgpu-debugger-reserve-regs -verify-machineinstrs < %s | FileCheck %s
; CHECK: reserved_vgpr_first = {{[0-9]+}}
; CHECK-NEXT: reserved_vgpr_count = 4
; CHECK: ReservedVGPRFirst: {{[0-9]+}}
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-note-no-func.ll b/llvm/test/CodeGen/AMDGPU/hsa-note-no-func.ll
index a4e599230b7..af63a4f8df7 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-note-no-func.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-note-no-func.ll
@@ -13,6 +13,8 @@
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx803 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI803 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx804 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI804 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx810 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI810 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX900 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx901 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX901 %s
; HSA: .hsa_code_object_version 2,1
; HSA-CI700: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
@@ -24,3 +26,5 @@
; HSA-VI803: .hsa_code_object_isa 8,0,3,"AMD","AMDGPU"
; HSA-VI804: .hsa_code_object_isa 8,0,4,"AMD","AMDGPU"
; HSA-VI810: .hsa_code_object_isa 8,1,0,"AMD","AMDGPU"
+; HSA-GFX900: .hsa_code_object_isa 9,0,0,"AMD","AMDGPU"
+; HSA-GFX901: .hsa_code_object_isa 9,0,1,"AMD","AMDGPU"
diff --git a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
index 85cd903a405..e97cb1b5c34 100644
--- a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
+++ b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
@@ -1,6 +1,7 @@
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,VI
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,VI,GFX9
--- |
define void @div_fmas() { ret void }
@@ -9,6 +10,37 @@
define void @vmem_gt_8dw_store() { ret void }
define void @readwrite_lane() { ret void }
define void @rfe() { ret void }
+ define void @s_mov_fed_b32() { ret void }
+ define void @s_movrel() { ret void }
+ define void @v_interp() { ret void }
+
+ define void @mov_fed_hazard_crash_on_dbg_value(i32 addrspace(1)* %A) {
+ entry:
+ %A.addr = alloca i32 addrspace(1)*, align 4
+ store i32 addrspace(1)* %A, i32 addrspace(1)** %A.addr, align 4
+ call void @llvm.dbg.declare(metadata i32 addrspace(1)** %A.addr, metadata !5, metadata !11), !dbg !12
+ ret void
+ }
+
+ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+ !llvm.dbg.cu = !{!0}
+ !llvm.module.flags = !{!3, !4}
+
+ !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 (trunk 268929)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+ !1 = !DIFile(filename: "test01.cl", directory: "/dev/null")
+ !2 = !{}
+ !3 = !{i32 2, !"Dwarf Version", i32 2}
+ !4 = !{i32 2, !"Debug Info Version", i32 3}
+ !5 = !DILocalVariable(name: "A", arg: 1, scope: !6, file: !1, line: 1, type: !9)
+ !6 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
+ !7 = !DISubroutineType(types: !8)
+ !8 = !{null, !9}
+ !9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 64, align: 32)
+ !10 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+ !11 = !DIExpression()
+ !12 = !DILocation(line: 1, column: 30, scope: !6)
+
...
---
# GCN-LABEL: name: div_fmas
@@ -331,3 +363,185 @@ body: |
S_ENDPGM
...
+
+...
+---
+
+# GCN-LABEL: name: s_mov_fed_b32
+
+# GCN-LABEL: bb.0:
+# GCN: S_MOV_FED_B32
+# GFX9: S_NOP
+# GCN-NEXT: S_MOV_B32
+
+# GCN-LABEL: bb.1:
+# GCN: S_MOV_FED_B32
+# GFX9: S_NOP
+# GCN-NEXT: V_MOV_B32
+name: s_mov_fed_b32
+
+body: |
+ bb.0:
+ successors: %bb.1
+ %sgpr0 = S_MOV_FED_B32 %sgpr0
+ %sgpr0 = S_MOV_B32 %sgpr0
+ S_BRANCH %bb.1
+
+ bb.1:
+ %sgpr0 = S_MOV_FED_B32 %sgpr0
+ %vgpr0 = V_MOV_B32_e32 %sgpr0, implicit %exec
+ S_ENDPGM
+
+...
+
+...
+---
+
+# GCN-LABEL: name: s_movrel
+
+# GCN-LABEL: bb.0:
+# GCN: S_MOV_B32
+# GFX9: S_NOP
+# GCN-NEXT: S_MOVRELS_B32
+
+# GCN-LABEL: bb.1:
+# GCN: S_MOV_B32
+# GFX9: S_NOP
+# GCN-NEXT: S_MOVRELS_B64
+
+# GCN-LABEL: bb.2:
+# GCN: S_MOV_B32
+# GFX9: S_NOP
+# GCN-NEXT: S_MOVRELD_B32
+
+# GCN-LABEL: bb.3:
+# GCN: S_MOV_B32
+# GFX9: S_NOP
+# GCN-NEXT: S_MOVRELD_B64
+
+name: s_movrel
+
+body: |
+ bb.0:
+ successors: %bb.1
+ %m0 = S_MOV_B32 0
+ %sgpr0 = S_MOVRELS_B32 %sgpr0, implicit %m0
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2
+ %m0 = S_MOV_B32 0
+ %sgpr0_sgpr1 = S_MOVRELS_B64 %sgpr0_sgpr1, implicit %m0
+ S_BRANCH %bb.2
+
+ bb.2:
+ successors: %bb.3
+ %m0 = S_MOV_B32 0
+ %sgpr0 = S_MOVRELD_B32 %sgpr0, implicit %m0
+ S_BRANCH %bb.3
+
+ bb.3:
+ %m0 = S_MOV_B32 0
+ %sgpr0_sgpr1 = S_MOVRELD_B64 %sgpr0_sgpr1, implicit %m0
+ S_ENDPGM
+...
+
+...
+---
+
+# GCN-LABEL: name: v_interp
+
+# GCN-LABEL: bb.0:
+# GCN: S_MOV_B32
+# GFX9: S_NOP
+# GCN-NEXT: V_INTERP_P1_F32
+
+# GCN-LABEL: bb.1:
+# GCN: S_MOV_B32
+# GFX9: S_NOP
+# GCN-NEXT: V_INTERP_P2_F32
+
+# GCN-LABEL: bb.2:
+# GCN: S_MOV_B32
+# GFX9: S_NOP
+# GCN-NEXT: V_INTERP_P1_F32_16bank
+
+# GCN-LABEL: bb.3:
+# GCN: S_MOV_B32
+# GFX9: S_NOP
+# GCN-NEXT: V_INTERP_MOV_F32
+
+name: v_interp
+
+body: |
+ bb.0:
+ successors: %bb.1
+ %m0 = S_MOV_B32 0
+ %vgpr0 = V_INTERP_P1_F32 %vgpr0, 0, 0, implicit %m0, implicit %exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2
+ %m0 = S_MOV_B32 0
+ %vgpr0 = V_INTERP_P2_F32 %vgpr0, %vgpr1, 0, 0, implicit %m0, implicit %exec
+ S_BRANCH %bb.2
+
+ bb.2:
+ successors: %bb.3
+ %m0 = S_MOV_B32 0
+ %vgpr0 = V_INTERP_P1_F32_16bank %vgpr0, 0, 0, implicit %m0, implicit %exec
+ S_BRANCH %bb.3
+
+ bb.3:
+ %m0 = S_MOV_B32 0
+ %vgpr0 = V_INTERP_MOV_F32 0, 0, 0, implicit %m0, implicit %exec
+ S_ENDPGM
+...
+---
+name: mov_fed_hazard_crash_on_dbg_value
+alignment: 0
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+liveins:
+ - { reg: '%sgpr4_sgpr5' }
+ - { reg: '%sgpr6_sgpr7' }
+ - { reg: '%sgpr9' }
+ - { reg: '%sgpr0_sgpr1_sgpr2_sgpr3' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 16
+ offsetAdjustment: 0
+ maxAlignment: 8
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+stack:
+ - { id: 0, name: A.addr, offset: 0, size: 8, alignment: 8, local-offset: 0 }
+ - { id: 1, offset: 8, size: 4, alignment: 4 }
+body: |
+ bb.0.entry:
+ liveins: %sgpr4_sgpr5, %sgpr6_sgpr7, %sgpr9, %sgpr0_sgpr1_sgpr2_sgpr3
+
+ %flat_scr_lo = S_ADD_U32 %sgpr6, %sgpr9, implicit-def %scc
+ %flat_scr_hi = S_ADDC_U32 %sgpr7, 0, implicit-def %scc, implicit %scc
+ DBG_VALUE _, 2, !5, !11, debug-location !12
+ %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr4_sgpr5, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+ dead %sgpr6_sgpr7 = KILL %sgpr4_sgpr5
+ %sgpr8 = S_MOV_B32 %sgpr5
+ %vgpr0 = V_MOV_B32_e32 killed %sgpr8, implicit %exec
+ BUFFER_STORE_DWORD_OFFSET %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr9, 4, 0, 0, 0, implicit %exec :: (store 4 into %ir.A.addr + 4)
+ %sgpr8 = S_MOV_B32 %sgpr4, implicit killed %sgpr4_sgpr5
+ %vgpr0 = V_MOV_B32_e32 killed %sgpr8, implicit %exec
+ BUFFER_STORE_DWORD_OFFSET %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr9, 0, 0, 0, 0, implicit %exec :: (store 4 into %ir.A.addr)
+ S_ENDPGM
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
index 132e476d5e2..9559b5a84b1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
@@ -1,8 +1,11 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8 %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
; GCN-LABEL: {{^}}test_barrier:
-; GCN: buffer_store_dword
-; GCN: s_waitcnt
+; GFX8: buffer_store_dword
+; GFX8: s_waitcnt
+; GFX9: flat_store_dword
+; GFX9-NOT: s_waitcnt
; GCN: s_barrier
define void @test_barrier(i32 addrspace(1)* %out) #0 {
entry:
OpenPOWER on IntegriCloud