diff options
| author | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2020-01-13 14:30:21 -0800 |
|---|---|---|
| committer | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2020-01-14 01:08:30 -0800 |
| commit | eca44745871bc46728903aaa262abc6344d4f959 (patch) | |
| tree | c3710a6c32e95872f16ae2ed96ea2e9248191f35 /llvm/test/CodeGen/AMDGPU | |
| parent | 547abdd921e45fd65a2fa60f21715facb4af31b2 (diff) | |
| download | bcm5719-llvm-eca44745871bc46728903aaa262abc6344d4f959.tar.gz bcm5719-llvm-eca44745871bc46728903aaa262abc6344d4f959.zip | |
[AMDGPU] Fix getInstrLatency() always returning 1
We do not have InstrItinerary so generic getInstLatency() was always
defaulting to return 1 cycle. We need to use TargetSchedModel instead
to compute an instruction's latency.
Differential Revision: https://reviews.llvm.org/D72655
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/max.i16.ll | 7 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/scheduler-handle-move-bundle.mir | 2 |
2 files changed, 5 insertions, 4 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/max.i16.ll b/llvm/test/CodeGen/AMDGPU/max.i16.ll index 5839eccf608..ff624ec1d0c 100644 --- a/llvm/test/CodeGen/AMDGPU/max.i16.ll +++ b/llvm/test/CodeGen/AMDGPU/max.i16.ll @@ -175,11 +175,12 @@ define amdgpu_kernel void @v_test_imax_sge_v3i16(<3 x i16> addrspace(1)* %out, < ; GFX9-NEXT: s_waitcnt vmcnt(1) ; GFX9-NEXT: v_mov_b32_e32 v8, v6 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_pk_max_i16 v6, v6, v7 -; GFX9-NEXT: global_load_short_d16 v7, v[2:3], off offset:4 +; GFX9-NEXT: v_mov_b32_e32 v9, v7 ; GFX9-NEXT: global_load_short_d16 v8, v[0:1], off offset:4 +; GFX9-NEXT: global_load_short_d16 v9, v[2:3], off offset:4 +; GFX9-NEXT: v_pk_max_i16 v6, v6, v7 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_pk_max_i16 v0, v8, v7 +; GFX9-NEXT: v_pk_max_i16 v0, v8, v9 ; GFX9-NEXT: global_store_dword v[4:5], v6, off ; GFX9-NEXT: global_store_short v[4:5], v0, off offset:4 ; GFX9-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/scheduler-handle-move-bundle.mir b/llvm/test/CodeGen/AMDGPU/scheduler-handle-move-bundle.mir index b46bee82210..63523167ac1 100644 --- a/llvm/test/CodeGen/AMDGPU/scheduler-handle-move-bundle.mir +++ b/llvm/test/CodeGen/AMDGPU/scheduler-handle-move-bundle.mir @@ -23,8 +23,8 @@ body: | ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec ; GCN: [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[COPY]], 0, 0, 0 :: (dereferenceable invariant load 4, align 16, addrspace 4) ; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec - ; GCN: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec ; GCN: DS_WRITE_B32_gfx9 [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (store 4, addrspace 3) + ; GCN: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2, implicit $exec ; GCN: $m0 = S_MOV_B32 0 ; GCN: $vgpr0 = COPY [[S_LOAD_DWORD_IMM]] ; GCN: BUNDLE implicit $vgpr0, implicit $m0, implicit $exec { |

