summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2016-12-05 20:23:10 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2016-12-05 20:23:10 +0000
commit7bee6ac798f2c547753dd867e130ec587f201483 (patch)
treeda0fea7e1f415a0dbada331fc836c3e2ca547240 /llvm/test/CodeGen
parentdf87d070c917029bd0209408fcfe833d149bcca7 (diff)
downloadbcm5719-llvm-7bee6ac798f2c547753dd867e130ec587f201483.tar.gz
bcm5719-llvm-7bee6ac798f2c547753dd867e130ec587f201483.zip
AMDGPU: Refactor exp instructions
Structure the definitions a bit more like the other classes. The main change here is to split EXP with the done bit set to a separate opcode, so we can set mayLoad = 1 so that it won't be reordered before the other exp stores, since this has the special constraint that if the done bit is set then this should be the last exp in she shader. Previously all exp instructions were inferred to have unmodeled side effects. llvm-svn: 288695
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/AMDGPU/ret.ll8
-rw-r--r--llvm/test/CodeGen/MIR/AMDGPU/insert-waits-exp.mir63
-rw-r--r--llvm/test/CodeGen/MIR/AMDGPU/movrels-bug.mir2
3 files changed, 68 insertions, 5 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/ret.ll b/llvm/test/CodeGen/AMDGPU/ret.ll
index 0408413f547..0bdecc96caf 100644
--- a/llvm/test/CodeGen/AMDGPU/ret.ll
+++ b/llvm/test/CodeGen/AMDGPU/ret.ll
@@ -6,7 +6,7 @@ declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float
; GCN-LABEL: {{^}}vgpr:
; GCN: v_mov_b32_e32 v1, v0
; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
-; GCN-DAG: exp 15, 0, 1, 1, 1, v1, v1, v1, v1
+; GCN-DAG: exp 15, 0, -1, 1, -1, v1, v1, v1, v1
; GCN: s_waitcnt expcnt(0)
; GCN-NOT: s_endpgm
define amdgpu_vs {float, float} @vgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) {
@@ -19,7 +19,7 @@ define amdgpu_vs {float, float} @vgpr([9 x <16 x i8>] addrspace(2)* byval, i32 i
; GCN-LABEL: {{^}}vgpr_literal:
; GCN: v_mov_b32_e32 v4, v0
-; GCN: exp 15, 0, 1, 1, 1, v4, v4, v4, v4
+; GCN: exp 15, 0, -1, 1, -1, v4, v4, v4, v4
; GCN-DAG: v_mov_b32_e32 v0, 1.0
; GCN-DAG: v_mov_b32_e32 v1, 2.0
; GCN-DAG: v_mov_b32_e32 v2, 4.0
@@ -209,7 +209,7 @@ define amdgpu_vs {i32, i32, i32, i32} @sgpr_literal([9 x <16 x i8>] addrspace(2)
; GCN-LABEL: {{^}}both:
; GCN: v_mov_b32_e32 v1, v0
-; GCN-DAG: exp 15, 0, 1, 1, 1, v1, v1, v1, v1
+; GCN-DAG: exp 15, 0, -1, 1, -1, v1, v1, v1, v1
; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
; GCN-DAG: s_add_i32 s0, s3, 2
; GCN-DAG: s_mov_b32 s1, s2
@@ -231,7 +231,7 @@ define amdgpu_vs {float, i32, float, i32, i32} @both([9 x <16 x i8>] addrspace(2
; GCN-LABEL: {{^}}structure_literal:
; GCN: v_mov_b32_e32 v3, v0
-; GCN: exp 15, 0, 1, 1, 1, v3, v3, v3, v3
+; GCN: exp 15, 0, -1, 1, -1, v3, v3, v3, v3
; GCN-DAG: v_mov_b32_e32 v0, 1.0
; GCN-DAG: s_mov_b32 s0, 2
; GCN-DAG: s_mov_b32 s1, 3
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/insert-waits-exp.mir b/llvm/test/CodeGen/MIR/AMDGPU/insert-waits-exp.mir
new file mode 100644
index 00000000000..9aaa374ed28
--- /dev/null
+++ b/llvm/test/CodeGen/MIR/AMDGPU/insert-waits-exp.mir
@@ -0,0 +1,63 @@
+# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-insert-waits -o - %s | FileCheck %s
+--- |
+ define amdgpu_ps <4 x float> @exp_done_waitcnt(<4 x i32> inreg, <4 x i32> inreg, i32 inreg %w, float %v) {
+ %a = load volatile float, float addrspace(1)* undef
+ %b = load volatile float, float addrspace(1)* undef
+ %c = load volatile float, float addrspace(1)* undef
+ %d = load volatile float, float addrspace(1)* undef
+ call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %a, float %b, float %c, float %d)
+ ret <4 x float> <float 5.000000e-01, float 1.000000e+00, float 2.000000e+00, float 4.000000e+00>
+ }
+
+ declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+ attributes #0 = { readnone }
+ attributes #1 = { nounwind }
+
+...
+---
+
+# CHECK-LABEL: name: exp_done_waitcnt{{$}}
+# CHECK: EXP_DONE
+# CHECK-NEXT: S_WAITCNT 3855
+# CHECK: %vgpr0 = V_MOV_B32
+# CHECK: %vgpr1 = V_MOV_B32
+# CHECK: %vgpr2 = V_MOV_B32
+# CHECK: %vgpr3 = V_MOV_B32
+name: exp_done_waitcnt
+alignment: 0
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0 (%ir-block.2):
+ %sgpr3 = S_MOV_B32 61440
+ %sgpr2 = S_MOV_B32 -1
+ %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ %vgpr1 = BUFFER_LOAD_DWORD_OFFSET %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ %vgpr2 = BUFFER_LOAD_DWORD_OFFSET %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ %vgpr3 = BUFFER_LOAD_DWORD_OFFSET killed %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ EXP_DONE 0, killed %vgpr0, killed %vgpr1, killed %vgpr2, killed %vgpr3, -1, -1, 15, implicit %exec
+ %vgpr0 = V_MOV_B32_e32 1056964608, implicit %exec
+ %vgpr1 = V_MOV_B32_e32 1065353216, implicit %exec
+ %vgpr2 = V_MOV_B32_e32 1073741824, implicit %exec
+ %vgpr3 = V_MOV_B32_e32 1082130432, implicit %exec
+ SI_RETURN killed %vgpr0, killed %vgpr1, killed %vgpr2, killed %vgpr3
+
+...
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/movrels-bug.mir b/llvm/test/CodeGen/MIR/AMDGPU/movrels-bug.mir
index 6493cc8703e..9c330bc8a6b 100644
--- a/llvm/test/CodeGen/MIR/AMDGPU/movrels-bug.mir
+++ b/llvm/test/CodeGen/MIR/AMDGPU/movrels-bug.mir
@@ -25,7 +25,7 @@ body: |
%m0 = S_MOV_B32 undef %sgpr0
%vgpr1 = V_MOVRELS_B32_e32 undef %vgpr1, implicit %m0, implicit %exec, implicit killed %vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8
%vgpr4 = V_MAC_F32_e32 undef %vgpr0, undef %vgpr0, undef %vgpr4, implicit %exec
- EXP 15, 12, 0, 1, 0, undef %vgpr0, killed %vgpr1, killed %vgpr4, undef %vgpr0, implicit %exec
+ EXP_DONE 15, undef %vgpr0, killed %vgpr1, killed %vgpr4, undef %vgpr0, 0, 0, 12, implicit %exec
S_ENDPGM
...
OpenPOWER on IntegriCloud