diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2018-10-22 16:27:27 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2018-10-22 16:27:27 +0000 |
commit | 687ec75d10bd860edb194d88d5438dcb1bc6eb92 (patch) | |
tree | 89cfd5a61dee8ea2ffaadea0623d22eb28a86278 /llvm/test/CodeGen/AMDGPU/fmaxnum.ll | |
parent | b96181c2bf1d068824c6fd635c0921d0ffd20187 (diff) | |
download | bcm5719-llvm-687ec75d10bd860edb194d88d5438dcb1bc6eb92.tar.gz bcm5719-llvm-687ec75d10bd860edb194d88d5438dcb1bc6eb92.zip |
DAG: Change behavior of fminnum/fmaxnum nodes
Introduce new versions that follow the IEEE semantics
to help with legalization that may need quieted inputs.
There are some regressions from inserting unnecessary
canonicalizes when these are matched from fast math
fcmp + select which should be fixed in a future commit.
llvm-svn: 344914
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/fmaxnum.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fmaxnum.ll | 56 |
1 files changed, 32 insertions, 24 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/fmaxnum.ll b/llvm/test/CodeGen/AMDGPU/fmaxnum.ll index 58b5b5282b0..7e16d1b883a 100644 --- a/llvm/test/CodeGen/AMDGPU/fmaxnum.ll +++ b/llvm/test/CodeGen/AMDGPU/fmaxnum.ll @@ -1,14 +1,26 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s -; GCN-LABEL: {{^}}test_fmax_f32: -; GCN: v_max_f32_e32 -define amdgpu_kernel void @test_fmax_f32(float addrspace(1)* %out, float %a, float %b) #0 { - %val = call float @llvm.maxnum.f32(float %a, float %b) +; GCN-LABEL: {{^}}test_fmax_f32_ieee_mode_on: +; GCN: v_mul_f32_e64 [[QUIET0:v[0-9]+]], 1.0, s{{[0-9]+}} +; GCN: v_mul_f32_e64 [[QUIET1:v[0-9]+]], 1.0, s{{[0-9]+}} +; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], [[QUIET1]], [[QUIET0]] +; GCN-NOT: [[RESULT]] +; GCN: buffer_store_dword [[RESULT]] +define amdgpu_kernel void @test_fmax_f32_ieee_mode_on(float addrspace(1)* %out, float %a, float %b) #0 { + %val = call float @llvm.maxnum.f32(float %a, float %b) #1 store float %val, float addrspace(1)* %out, align 4 ret void } +; GCN-LABEL: {{^}}test_fmax_f32_ieee_mode_off: +; GCN: v_max_f32_e32 v0, v0, v1 +; GCN-NEXT: ; return +define amdgpu_ps float @test_fmax_f32_ieee_mode_off(float %a, float %b) #0 { + %val = call float @llvm.maxnum.f32(float %a, float %b) #1 + ret float %val +} + ; GCN-LABEL: {{^}}test_fmax_v2f32: ; GCN: v_max_f32_e32 ; GCN: v_max_f32_e32 @@ -158,38 +170,34 @@ define amdgpu_kernel void @constant_fold_fmax_f32_n0_n0(float addrspace(1)* %out ret void } -; GCN-LABEL: {{^}}fmax_var_immediate_f32: +; GCN-LABEL: {{^}}fmax_var_immediate_f32_no_ieee: ; GCN: v_max_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, 2.0 -define amdgpu_kernel void @fmax_var_immediate_f32(float addrspace(1)* %out, float %a) #0 { - %val = call float @llvm.maxnum.f32(float %a, float 2.0) - store float %val, float addrspace(1)* %out, align 4 - ret void +define amdgpu_ps float @fmax_var_immediate_f32_no_ieee(float inreg %a) #0 { + %val = call float @llvm.maxnum.f32(float %a, float 2.0) #0 + ret float %val } -; GCN-LABEL: {{^}}fmax_immediate_var_f32: +; GCN-LABEL: {{^}}fmax_immediate_var_f32_no_ieee: ; GCN: v_max_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, 2.0 -define amdgpu_kernel void @fmax_immediate_var_f32(float addrspace(1)* %out, float %a) #0 { - %val = call float @llvm.maxnum.f32(float 2.0, float %a) - store float %val, float addrspace(1)* %out, align 4 - ret void +define amdgpu_ps float @fmax_immediate_var_f32_no_ieee(float inreg %a) #0 { + %val = call float @llvm.maxnum.f32(float 2.0, float %a) #0 + ret float %val } -; GCN-LABEL: {{^}}fmax_var_literal_f32: +; GCN-LABEL: {{^}}fmax_var_literal_f32_no_ieee: ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000 ; GCN: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]] -define amdgpu_kernel void @fmax_var_literal_f32(float addrspace(1)* %out, float %a) #0 { - %val = call float @llvm.maxnum.f32(float %a, float 99.0) - store float %val, float addrspace(1)* %out, align 4 - ret void +define amdgpu_ps float @fmax_var_literal_f32_no_ieee(float inreg %a) #0 { + %val = call float @llvm.maxnum.f32(float %a, float 99.0) #0 + ret float %val } -; GCN-LABEL: {{^}}fmax_literal_var_f32: +; GCN-LABEL: {{^}}fmax_literal_var_f32_no_ieee: ; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000 ; GCN: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]] -define amdgpu_kernel void @fmax_literal_var_f32(float addrspace(1)* %out, float %a) #0 { - %val = call float @llvm.maxnum.f32(float 99.0, float %a) - store float %val, float addrspace(1)* %out, align 4 - ret void +define amdgpu_ps float @fmax_literal_var_f32_no_ieee(float inreg %a) #0 { + %val = call float @llvm.maxnum.f32(float 99.0, float %a) #0 + ret float %val } ; GCN-LABEL: {{^}}test_func_fmax_v3f32: |