diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2015-09-25 16:58:25 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2015-09-25 16:58:25 +0000 |
| commit | 5f70436c4921cbc0c4f684f465d15f01459c12af (patch) | |
| tree | 3282f22555cc4b355a11755325da4f97c379e1fe /llvm/test/CodeGen/AMDGPU | |
| parent | 548d22c0734d19a91f2c645b7a52803898354f5b (diff) | |
| download | bcm5719-llvm-5f70436c4921cbc0c4f684f465d15f01459c12af.tar.gz bcm5719-llvm-5f70436c4921cbc0c4f684f465d15f01459c12af.zip | |
AMDGPU: Improve accuracy of instruction rates for VOPC
These were all using the default 32-bit VALU write class,
but the i64/f64 compares are half rate.
I'm not sure this is really correct, because they are still using
the write to VALU write class, even though they really write
to the SALU.
llvm-svn: 248582
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll | 6 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/valu-i1.ll | 12 |
2 files changed, 10 insertions, 8 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll b/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll index 805a88b59c7..80eb3b93f8e 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll @@ -271,7 +271,8 @@ define void @test_class_64_f64(i32 addrspace(1)* %out, double %a) #0 { ; SI: s_load_dwordx2 [[SA:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb ; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1ff{{$}} ; SI: v_cmp_class_f64_e32 vcc, [[SA]], [[MASK]] -; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc +; SI-NOT: vcc +; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc ; SI-NEXT: buffer_store_dword [[RESULT]] ; SI: s_endpgm define void @test_class_full_mask_f64(i32 addrspace(1)* %out, double %a) #0 { @@ -285,7 +286,8 @@ define void @test_class_full_mask_f64(i32 addrspace(1)* %out, double %a) #0 { ; SI-DAG: buffer_load_dwordx2 [[VA:v\[[0-9]+:[0-9]+\]]] ; SI-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1ff{{$}} ; SI: v_cmp_class_f64_e32 vcc, [[VA]], [[MASK]] -; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc +; SI-NOT: vcc +; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define void @v_test_class_full_mask_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/valu-i1.ll b/llvm/test/CodeGen/AMDGPU/valu-i1.ll index 7d0ebd139f5..c27702813a8 100644 --- a/llvm/test/CodeGen/AMDGPU/valu-i1.ll +++ b/llvm/test/CodeGen/AMDGPU/valu-i1.ll @@ -128,18 +128,18 @@ exit: ; SI-DAG: v_cmp_ne_i32_e64 [[NEG1_CHECK_0:s\[[0-9]+:[0-9]+\]]], -1, [[A]] ; SI-DAG: v_cmp_ne_i32_e32 [[NEG1_CHECK_1:vcc]], -1, [[B]] ; SI: s_and_b64 [[ORNEG1:s\[[0-9]+:[0-9]+\]]], [[NEG1_CHECK_1]], [[NEG1_CHECK_0]] -; SI: s_and_saveexec_b64 [[ORNEG1]], [[ORNEG1]] -; SI: s_xor_b64 [[ORNEG1]], exec, [[ORNEG1]] +; SI: s_and_saveexec_b64 [[ORNEG2:s\[[0-9]+:[0-9]+\]]], [[ORNEG1]] +; SI: s_xor_b64 [[ORNEG2]], exec, [[ORNEG2]] ; SI: s_cbranch_execz BB3_5 ; SI: BB#4: ; SI: buffer_store_dword -; SI: v_cmp_ge_i64_e32 vcc -; SI: s_or_b64 [[COND_STATE]], vcc, [[COND_STATE]] +; SI: v_cmp_ge_i64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]] +; SI: s_or_b64 [[COND_STATE]], [[CMP]], [[COND_STATE]] ; SI: BB3_5: -; SI: s_or_b64 exec, exec, [[ORNEG1]] -; SI: s_or_b64 [[COND_STATE]], [[ORNEG1]], [[COND_STATE]] +; SI: s_or_b64 exec, exec, [[ORNEG2]] +; SI: s_or_b64 [[COND_STATE]], [[ORNEG2]], [[COND_STATE]] ; SI: s_andn2_b64 exec, exec, [[COND_STATE]] ; SI: s_cbranch_execnz BB3_3 |

