summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2015-09-26 02:25:45 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2015-09-26 02:25:45 +0000
commit86095b8decb5782e3ae5b8b2e8bedfd16db370d1 (patch)
treea6164f423a1cd10b13f900639ef14f7b6df8482f
parent583a1f72c7fe102782c28cc7965599cf45ea4770 (diff)
downloadbcm5719-llvm-86095b8decb5782e3ae5b8b2e8bedfd16db370d1.tar.gz
bcm5719-llvm-86095b8decb5782e3ae5b8b2e8bedfd16db370d1.zip
AMDGPU: Fix sched model for VOP2b instructions
Trying to use the version with the explicit output operand would complain because of the missing WriteSALU. I'm not sure why it doesn't complain about this with the implicit VCC def. llvm-svn: 248646
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td13
-rw-r--r--llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll5
2 files changed, 10 insertions, 8 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 0a73419dbbe..dcc44b82627 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1613,14 +1613,15 @@ multiclass VOP2b_Helper <vop2 op, string opName, dag outs32, dag outs64,
dag ins32, string asm32, list<dag> pat32,
dag ins64, string asm64, list<dag> pat64,
string revOp, bit HasMods, bit useSGPRInput> {
+ let SchedRW = [Write32Bit, WriteSALU] in {
+ let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in {
+ defm _e32 : VOP2_m <op, outs32, ins32, asm32, pat32, opName, revOp>;
+ }
- let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in {
- defm _e32 : VOP2_m <op, outs32, ins32, asm32, pat32, opName, revOp>;
+ defm _e64 : VOP3b_2_3_m <op,
+ outs64, ins64, opName#asm64, pat64, opName, revOp, HasMods, useSGPRInput
+ >;
}
-
- defm _e64 : VOP3b_2_3_m <op,
- outs64, ins64, opName#asm64, pat64, opName, revOp, HasMods, useSGPRInput
- >;
}
multiclass VOP2bInst <vop2 op, string opName, VOPProfile P,
diff --git a/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll b/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
index 5a6129aaa3f..35e9ff83463 100644
--- a/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
@@ -155,9 +155,9 @@ define void @reorder_global_load_local_store_global_load(i32 addrspace(1)* %out,
}
; FUNC-LABEL: @reorder_local_offsets
-; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:12
; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:400
; CI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:404
+; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:12
; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:400
; CI: ds_write_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:404
; CI: buffer_store_dword
@@ -181,9 +181,10 @@ define void @reorder_local_offsets(i32 addrspace(1)* nocapture %out, i32 addrspa
}
; FUNC-LABEL: @reorder_global_offsets
-; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12
; CI: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
; CI: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:404
+; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12
+; CI: buffer_load_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:12
; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:400
; CI: buffer_store_dword {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:404
; CI: buffer_store_dword
OpenPOWER on IntegriCloud