summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU
diff options
context:
space:
mode:
authorTim Renouf <tpr.llvm@botech.co.uk>2018-02-28 19:10:32 +0000
committerTim Renouf <tpr.llvm@botech.co.uk>2018-02-28 19:10:32 +0000
commit2a99fa2c084b1439c7473032f4af0808d5faabc6 (patch)
tree7aba74c5537d3f499d2a2e62e5d30fb0eba8425d /llvm/test/CodeGen/AMDGPU
parentd319674a81ad579d2067013101e50df8ca1cd7d1 (diff)
downloadbcm5719-llvm-2a99fa2c084b1439c7473032f4af0808d5faabc6.tar.gz
bcm5719-llvm-2a99fa2c084b1439c7473032f4af0808d5faabc6.zip
[AMDGPU] added writelane intrinsic
Summary: For use by LLPC SPV_AMD_shader_ballot extension. The v_writelane instruction was already implemented for use by SGPR spilling, but I had to add an extra dummy operand tied to the destination, to represent that all lanes except the selected one keep the old value of the destination register. .ll test changes were due to schedule changes caused by that new operand. Differential Revision: https://reviews.llvm.org/D42838 llvm-svn: 326353
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
-rw-r--r--llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll20
-rw-r--r--llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir4
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll82
-rw-r--r--llvm/test/CodeGen/AMDGPU/sibling-call.ll2
5 files changed, 95 insertions, 15 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll
index f164d8179ed..b56ec379bf1 100644
--- a/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll
+++ b/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll
@@ -33,16 +33,14 @@ entry:
; GCN-DAG: buffer_store_dword v32
; GCN-DAG: buffer_store_dword v33
; GCN-NOT: v_writelane_b32 v{{[0-9]+}}, s32
-; GCN: v_writelane_b32
-
+; GCN-DAG: v_writelane_b32
; GCN-DAG: s_add_u32 s32, s32, 0xb00{{$}}
-
; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:4{{$}}
-; GCN: v_add_{{[iu]}}32_e32 [[ADD0:v[0-9]+]], vcc, 1, [[LOAD0]]
-; GCN: buffer_store_dword [[ADD0]], off, s[0:3], s5 offset:4{{$}}
+; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD0:v[0-9]+]], vcc, 1, [[LOAD0]]
+; GCN-DAG: buffer_store_dword [[ADD0]], off, s[0:3], s5 offset:4{{$}}
-; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:20{{$}}
-; GCN: v_add_{{[iu]}}32_e32 [[ADD1:v[0-9]+]], vcc, 2, [[LOAD1]]
+; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:20{{$}}
+; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD1:v[0-9]+]], vcc, 2, [[LOAD1]]
; GCN: s_swappc_b64
@@ -80,10 +78,10 @@ entry:
; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s5 offset:8
; GCN-DAG: buffer_store_dword [[THIRTEEN]], off, s[0:3], s5 offset:24
-; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:8
-; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:12
-; GCN: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s5 offset:16
-; GCN: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s5 offset:20
+; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:8
+; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:12
+; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s5 offset:16
+; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s5 offset:20
; GCN-NOT: s_add_u32 s32, s32, 0x800
diff --git a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
index 2c8abf50090..14a6193b558 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
@@ -44,7 +44,7 @@ define void @callee_with_stack() #0 {
; GCN-DAG: v_writelane_b32 v32, s35,
; GCN-DAG: s_add_u32 s32, s32, 0x300{{$}}
; GCN-DAG: v_mov_b32_e32 v0, 0{{$}}
-; GCN: buffer_store_dword v0, off, s[0:3], s5 offset:4{{$}}
+; GCN-DAG: buffer_store_dword v0, off, s[0:3], s5 offset:4{{$}}
; GCN-DAG: s_mov_b32 s33, s5
diff --git a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
index e39fb65a2a1..f83fd1c8f47 100644
--- a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
+++ b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
@@ -308,7 +308,7 @@ body: |
bb.1:
$vgpr0,$sgpr0_sgpr1 = V_ADD_I32_e64 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
- $vgpr4 = V_WRITELANE_B32 $sgpr0, $sgpr0
+ $vgpr4 = V_WRITELANE_B32 $sgpr0, $sgpr0, $vgpr4
S_BRANCH %bb.2
bb.2:
@@ -318,7 +318,7 @@ body: |
bb.3:
$vgpr0,implicit $vcc = V_ADD_I32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
- $vgpr4 = V_WRITELANE_B32 $sgpr4, $vcc_lo
+ $vgpr4 = V_WRITELANE_B32 $sgpr4, $vcc_lo, $vgpr4
S_ENDPGM
...
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll
new file mode 100644
index 00000000000..361756a013b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll
@@ -0,0 +1,82 @@
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx802 -verify-machineinstrs < %s | FileCheck %s
+
+declare i32 @llvm.amdgcn.writelane(i32, i32, i32) #0
+
+; CHECK-LABEL: {{^}}test_writelane_sreg:
+; CHECK: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
+define amdgpu_kernel void @test_writelane_sreg(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #1 {
+ %oldval = load i32, i32 addrspace(1)* %out
+ %writelane = call i32 @llvm.amdgcn.writelane(i32 %src0, i32 %src1, i32 %oldval)
+ store i32 %writelane, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; CHECK-LABEL: {{^}}test_writelane_imm_sreg:
+; CHECK: v_writelane_b32 v{{[0-9]+}}, 32, s{{[0-9]+}}
+define amdgpu_kernel void @test_writelane_imm_sreg(i32 addrspace(1)* %out, i32 %src1) #1 {
+ %oldval = load i32, i32 addrspace(1)* %out
+ %writelane = call i32 @llvm.amdgcn.writelane(i32 32, i32 %src1, i32 %oldval)
+ store i32 %writelane, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; CHECK-LABEL: {{^}}test_writelane_vreg_lane:
+; CHECK: v_readfirstlane_b32 [[LANE:s[0-9]+]], v{{[0-9]+}}
+; CHECK: v_writelane_b32 v{{[0-9]+}}, 12, [[LANE]]
+define amdgpu_kernel void @test_writelane_vreg_lane(i32 addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #1 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep.in = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 %tid
+ %args = load <2 x i32>, <2 x i32> addrspace(1)* %gep.in
+ %oldval = load i32, i32 addrspace(1)* %out
+ %lane = extractelement <2 x i32> %args, i32 1
+ %writelane = call i32 @llvm.amdgcn.writelane(i32 12, i32 %lane, i32 %oldval)
+ store i32 %writelane, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; TODO: m0 should be folded.
+; CHECK-LABEL: {{^}}test_writelane_m0_sreg:
+; CHECK: s_mov_b32 m0, -1
+; CHECK: s_mov_b32 [[COPY_M0:s[0-9]+]], m0
+; CHECK: v_writelane_b32 v{{[0-9]+}}, [[COPY_M0]], s{{[0-9]+}}
+define amdgpu_kernel void @test_writelane_m0_sreg(i32 addrspace(1)* %out, i32 %src1) #1 {
+ %oldval = load i32, i32 addrspace(1)* %out
+ %m0 = call i32 asm "s_mov_b32 m0, -1", "={M0}"()
+ %writelane = call i32 @llvm.amdgcn.writelane(i32 %m0, i32 %src1, i32 %oldval)
+ store i32 %writelane, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; CHECK-LABEL: {{^}}test_writelane_imm:
+; CHECK: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 32
+define amdgpu_kernel void @test_writelane_imm(i32 addrspace(1)* %out, i32 %src0) #1 {
+ %oldval = load i32, i32 addrspace(1)* %out
+ %writelane = call i32 @llvm.amdgcn.writelane(i32 %src0, i32 32, i32 %oldval) #0
+ store i32 %writelane, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; CHECK-LABEL: {{^}}test_writelane_sreg_oldval:
+; CHECK: v_mov_b32_e32 [[OLDVAL:v[0-9]+]], s{{[0-9]+}}
+; CHECK: v_writelane_b32 [[OLDVAL]], s{{[0-9]+}}, s{{[0-9]+}}
+define amdgpu_kernel void @test_writelane_sreg_oldval(i32 inreg %oldval, i32 addrspace(1)* %out, i32 %src0, i32 %src1) #1 {
+ %writelane = call i32 @llvm.amdgcn.writelane(i32 %src0, i32 %src1, i32 %oldval)
+ store i32 %writelane, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; CHECK-LABEL: {{^}}test_writelane_imm_oldval:
+; CHECK: v_mov_b32_e32 [[OLDVAL:v[0-9]+]], 42
+; CHECK: v_writelane_b32 [[OLDVAL]], s{{[0-9]+}}, s{{[0-9]+}}
+define amdgpu_kernel void @test_writelane_imm_oldval(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #1 {
+ %writelane = call i32 @llvm.amdgcn.writelane(i32 %src0, i32 %src1, i32 42)
+ store i32 %writelane, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #2
+
+attributes #0 = { nounwind readnone convergent }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
index f7e8a1d80e9..1c0076c9cd9 100644
--- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
@@ -216,7 +216,7 @@ entry:
; GCN-DAG: v_writelane_b32 v34, s35, 2
; GCN-DAG: s_add_u32 s32, s32, 0x400
-; GCN: s_getpc_b64
+; GCN-DAG: s_getpc_b64
; GCN: s_swappc_b64
; GCN: s_getpc_b64 s[6:7]
OpenPOWER on IntegriCloud