summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU/fmaxnum.ll
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2018-10-22 16:27:27 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2018-10-22 16:27:27 +0000
commit687ec75d10bd860edb194d88d5438dcb1bc6eb92 (patch)
tree89cfd5a61dee8ea2ffaadea0623d22eb28a86278 /llvm/test/CodeGen/AMDGPU/fmaxnum.ll
parentb96181c2bf1d068824c6fd635c0921d0ffd20187 (diff)
downloadbcm5719-llvm-687ec75d10bd860edb194d88d5438dcb1bc6eb92.tar.gz
bcm5719-llvm-687ec75d10bd860edb194d88d5438dcb1bc6eb92.zip
DAG: Change behavior of fminnum/fmaxnum nodes
Introduce new versions that follow the IEEE semantics to help with legalization that may need quieted inputs. There are some regressions from inserting unnecessary canonicalizes when these are matched from fast math fcmp + select which should be fixed in a future commit. llvm-svn: 344914
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/fmaxnum.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/fmaxnum.ll56
1 files changed, 32 insertions, 24 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/fmaxnum.ll b/llvm/test/CodeGen/AMDGPU/fmaxnum.ll
index 58b5b5282b0..7e16d1b883a 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaxnum.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaxnum.ll
@@ -1,14 +1,26 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
-; GCN-LABEL: {{^}}test_fmax_f32:
-; GCN: v_max_f32_e32
-define amdgpu_kernel void @test_fmax_f32(float addrspace(1)* %out, float %a, float %b) #0 {
- %val = call float @llvm.maxnum.f32(float %a, float %b)
+; GCN-LABEL: {{^}}test_fmax_f32_ieee_mode_on:
+; GCN: v_mul_f32_e64 [[QUIET0:v[0-9]+]], 1.0, s{{[0-9]+}}
+; GCN: v_mul_f32_e64 [[QUIET1:v[0-9]+]], 1.0, s{{[0-9]+}}
+; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], [[QUIET1]], [[QUIET0]]
+; GCN-NOT: [[RESULT]]
+; GCN: buffer_store_dword [[RESULT]]
+define amdgpu_kernel void @test_fmax_f32_ieee_mode_on(float addrspace(1)* %out, float %a, float %b) #0 {
+ %val = call float @llvm.maxnum.f32(float %a, float %b) #1
store float %val, float addrspace(1)* %out, align 4
ret void
}
+; GCN-LABEL: {{^}}test_fmax_f32_ieee_mode_off:
+; GCN: v_max_f32_e32 v0, v0, v1
+; GCN-NEXT: ; return
+define amdgpu_ps float @test_fmax_f32_ieee_mode_off(float %a, float %b) #0 {
+ %val = call float @llvm.maxnum.f32(float %a, float %b) #1
+ ret float %val
+}
+
; GCN-LABEL: {{^}}test_fmax_v2f32:
; GCN: v_max_f32_e32
; GCN: v_max_f32_e32
@@ -158,38 +170,34 @@ define amdgpu_kernel void @constant_fold_fmax_f32_n0_n0(float addrspace(1)* %out
ret void
}
-; GCN-LABEL: {{^}}fmax_var_immediate_f32:
+; GCN-LABEL: {{^}}fmax_var_immediate_f32_no_ieee:
; GCN: v_max_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, 2.0
-define amdgpu_kernel void @fmax_var_immediate_f32(float addrspace(1)* %out, float %a) #0 {
- %val = call float @llvm.maxnum.f32(float %a, float 2.0)
- store float %val, float addrspace(1)* %out, align 4
- ret void
+define amdgpu_ps float @fmax_var_immediate_f32_no_ieee(float inreg %a) #0 {
+ %val = call float @llvm.maxnum.f32(float %a, float 2.0) #0
+ ret float %val
}
-; GCN-LABEL: {{^}}fmax_immediate_var_f32:
+; GCN-LABEL: {{^}}fmax_immediate_var_f32_no_ieee:
; GCN: v_max_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, 2.0
-define amdgpu_kernel void @fmax_immediate_var_f32(float addrspace(1)* %out, float %a) #0 {
- %val = call float @llvm.maxnum.f32(float 2.0, float %a)
- store float %val, float addrspace(1)* %out, align 4
- ret void
+define amdgpu_ps float @fmax_immediate_var_f32_no_ieee(float inreg %a) #0 {
+ %val = call float @llvm.maxnum.f32(float 2.0, float %a) #0
+ ret float %val
}
-; GCN-LABEL: {{^}}fmax_var_literal_f32:
+; GCN-LABEL: {{^}}fmax_var_literal_f32_no_ieee:
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
; GCN: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
-define amdgpu_kernel void @fmax_var_literal_f32(float addrspace(1)* %out, float %a) #0 {
- %val = call float @llvm.maxnum.f32(float %a, float 99.0)
- store float %val, float addrspace(1)* %out, align 4
- ret void
+define amdgpu_ps float @fmax_var_literal_f32_no_ieee(float inreg %a) #0 {
+ %val = call float @llvm.maxnum.f32(float %a, float 99.0) #0
+ ret float %val
}
-; GCN-LABEL: {{^}}fmax_literal_var_f32:
+; GCN-LABEL: {{^}}fmax_literal_var_f32_no_ieee:
; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x42c60000
; GCN: v_max_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[REG]]
-define amdgpu_kernel void @fmax_literal_var_f32(float addrspace(1)* %out, float %a) #0 {
- %val = call float @llvm.maxnum.f32(float 99.0, float %a)
- store float %val, float addrspace(1)* %out, align 4
- ret void
+define amdgpu_ps float @fmax_literal_var_f32_no_ieee(float inreg %a) #0 {
+ %val = call float @llvm.maxnum.f32(float 99.0, float %a) #0
+ ret float %val
}
; GCN-LABEL: {{^}}test_func_fmax_v3f32:
OpenPOWER on IntegriCloud