diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2018-10-22 16:27:27 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2018-10-22 16:27:27 +0000 |
commit | 687ec75d10bd860edb194d88d5438dcb1bc6eb92 (patch) | |
tree | 89cfd5a61dee8ea2ffaadea0623d22eb28a86278 /llvm/test/CodeGen/AMDGPU/fmax_legacy.ll | |
parent | b96181c2bf1d068824c6fd635c0921d0ffd20187 (diff) | |
download | bcm5719-llvm-687ec75d10bd860edb194d88d5438dcb1bc6eb92.tar.gz bcm5719-llvm-687ec75d10bd860edb194d88d5438dcb1bc6eb92.zip |
DAG: Change behavior of fminnum/fmaxnum nodes
Introduce new versions that follow the IEEE semantics
to help with legalization that may need quieted inputs.
There are some regressions from inserting unnecessary
canonicalizes when these are matched from fast math
fcmp + select which should be fixed in a future commit.
llvm-svn: 344914
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/fmax_legacy.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fmax_legacy.ll | 97 |
1 files changed, 72 insertions, 25 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/fmax_legacy.ll b/llvm/test/CodeGen/AMDGPU/fmax_legacy.ll index 6a1f7966c30..1fd1556de74 100644 --- a/llvm/test/CodeGen/AMDGPU/fmax_legacy.ll +++ b/llvm/test/CodeGen/AMDGPU/fmax_legacy.ll @@ -1,13 +1,22 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=FUNC %s -; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN-NONAN -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN-SAFE,SI-SAFE,GCN,FUNC %s +; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI-NONAN,GCN-NONAN,GCN,FUNC %s + +; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI-SAFE,GCN-SAFE,GCN,FUNC %s +; RUN: llc -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI-NONAN,GCN-NONAN,GCN,FUNC %s + ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s declare i32 @llvm.r600.read.tidig.x() #1 ; FUNC-LABEL: {{^}}test_fmax_legacy_uge_f32: -; GCN: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; GCN: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 -; GCN-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]] +; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] + +; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]] + +; VI-SAFE: v_cmp_nlt_f32_e32 vcc, [[A]], [[B]] +; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]] + ; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]] ; EG: MAX @@ -26,12 +35,16 @@ define amdgpu_kernel void @test_fmax_legacy_uge_f32(float addrspace(1)* %out, fl } ; FUNC-LABEL: {{^}}test_fmax_legacy_uge_f32_nnan_src: -; GCN: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; GCN: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 +; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] ; GCN-DAG: v_add_f32_e32 [[ADD_A:v[0-9]+]], 1.0, [[A]] ; GCN-DAG: v_add_f32_e32 [[ADD_B:v[0-9]+]], 2.0, [[B]] -; GCN-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[ADD_B]], [[ADD_A]] +; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[ADD_B]], [[ADD_A]] + +; VI-SAFE: v_cmp_nlt_f32_e32 vcc, [[ADD_A]], [[ADD_B]] +; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[ADD_B]], [[ADD_A]] + ; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[ADD_A]], [[ADD_B]] ; EG: MAX @@ -52,9 +65,14 @@ define amdgpu_kernel void @test_fmax_legacy_uge_f32_nnan_src(float addrspace(1)* } ; FUNC-LABEL: {{^}}test_fmax_legacy_oge_f32: -; GCN: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; GCN: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 -; GCN-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]] +; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] + +; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]] + +; VI-SAFE: v_cmp_ge_f32_e32 vcc, [[A]], [[B]] +; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]] + ; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]] ; EG: MAX define amdgpu_kernel void @test_fmax_legacy_oge_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { @@ -72,9 +90,15 @@ define amdgpu_kernel void @test_fmax_legacy_oge_f32(float addrspace(1)* %out, fl } ; FUNC-LABEL: {{^}}test_fmax_legacy_ugt_f32: -; GCN: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; GCN: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 -; GCN-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]] +; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] + +; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[B]], [[A]] + +; VI-SAFE: v_cmp_nle_f32_e32 vcc, [[A]], [[B]] +; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]] + + ; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]] ; EG: MAX define amdgpu_kernel void @test_fmax_legacy_ugt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { @@ -92,9 +116,14 @@ define amdgpu_kernel void @test_fmax_legacy_ugt_f32(float addrspace(1)* %out, fl } ; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_f32: -; GCN: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; GCN: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 -; GCN-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]] +; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] + +; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]] + +; VI-SAFE: v_cmp_gt_f32_e32 vcc, [[A]], [[B]] +; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]] + ; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]] ; EG: MAX define amdgpu_kernel void @test_fmax_legacy_ogt_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 { @@ -112,9 +141,15 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_f32(float addrspace(1)* %out, fl } ; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_v1f32: -; GCN: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; GCN: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 -; GCN-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]] +; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] + +; SI-SAFE: v_max_legacy_f32_e32 {{v[0-9]+}}, [[A]], [[B]] + +; VI-SAFE: v_cmp_gt_f32_e32 vcc, [[A]], [[B]] +; VI-SAFE: v_cndmask_b32_e32 v{{[0-9]+}}, [[B]], [[A]] + + ; GCN-NONAN: v_max_f32_e32 {{v[0-9]+}}, [[A]], [[B]] ; EG: MAX define amdgpu_kernel void @test_fmax_legacy_ogt_v1f32(<1 x float> addrspace(1)* %out, <1 x float> addrspace(1)* %in) #0 { @@ -132,12 +167,24 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_v1f32(<1 x float> addrspace(1)* } ; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_v3f32: -; GCN-SAFE: v_max_legacy_f32_e32 -; GCN-SAFE: v_max_legacy_f32_e32 -; GCN-SAFE: v_max_legacy_f32_e32 +; SI-SAFE: v_max_legacy_f32_e32 +; SI-SAFE: v_max_legacy_f32_e32 +; SI-SAFE: v_max_legacy_f32_e32 + +; VI-SAFE: v_cmp_gt_f32_e32 +; VI-SAFE: v_cndmask_b32_e32 +; VI-SAFE: v_cmp_gt_f32_e32 +; VI-SAFE: v_cndmask_b32_e32 +; VI-SAFE: v_cmp_gt_f32_e32 +; VI-SAFE: v_cndmask_b32_e32 +; VI-SAFE-NOT: v_cmp +; VI-SAFE-NOT: v_cndmask + ; GCN-NONAN: v_max_f32_e32 ; GCN-NONAN: v_max_f32_e32 ; GCN-NONAN: v_max_f32_e32 + +; GCN-NOT: v_max define amdgpu_kernel void @test_fmax_legacy_ogt_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 { %tid = call i32 @llvm.r600.read.tidig.x() #1 %gep.0 = getelementptr <3 x float>, <3 x float> addrspace(1)* %in, i32 %tid @@ -153,8 +200,8 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_v3f32(<3 x float> addrspace(1)* } ; FUNC-LABEL: {{^}}test_fmax_legacy_ogt_f32_multi_use: -; GCN: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} -; GCN: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 +; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] +; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] ; GCN-NOT: v_max_ ; GCN: v_cmp_gt_f32 ; GCN-NEXT: v_cndmask_b32 |